134 files changed, 10294 insertions, 1431 deletions
diff --git a/Makefile.rules b/Makefile.rules
index 7f298a995422..9a6280bf7f24 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -1612,6 +1612,11 @@ $(ObjDir)/%GenIntrinsics.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) intrinsics information with tblgen"
 	$(Verb) $(TableGen) -gen-tgt-intrinsic -o $(call SYSPATH, $@) $<
 
+$(ObjDir)/ARMGenDecoderTables.inc.tmp : ARM.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) decoder tables with tblgen"
+	$(Verb) $(TableGen) -gen-arm-decoder -o $(call SYSPATH, $@) $<
+
+
 clean-local::
 	-$(Verb) $(RM) -f $(INCFiles)
 
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 8470e8356bce..5f6304b1fcd7 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -221,15 +221,35 @@ License, a "BSD-style" license.</p>
 
 <!--=========================================================================-->
 <div class="doc_subsection">
-<a name="dragonegg">DragonEgg: GCC-4.5 as an LLVM frontend</a>
+<a name="dragonegg">DragonEgg: llvm-gcc ported to gcc-4.5</a>
 </div>
 
 <div class="doc_text">
 <p>
-The goal of <a href="http://dragonegg.llvm.org/">DragonEgg</a> is to make
-gcc-4.5 act like llvm-gcc without requiring any gcc modifications whatsoever.
-<a href="http://dragonegg.llvm.org/">DragonEgg</a> is a shared library (dragonegg.so)
-that is loaded by gcc at runtime.  It ...
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> is a port of llvm-gcc to
+gcc-4.5.  Unlike llvm-gcc, which makes many intrusive changes to the underlying
+gcc-4.2 code, dragonegg in theory does not require any gcc-4.5 modifications
+whatsoever (currently one small patch is needed).  This is thanks to the new
+<a href="http://gcc.gnu.org/wiki/plugins">gcc plugin architecture</a>, which
+makes it possible to modify the behaviour of gcc at runtime by loading a plugin,
+which is nothing more than a dynamic library which conforms to the gcc plugin
+interface.  DragonEgg is a gcc plugin that causes the LLVM optimizers to be run
+instead of the gcc optimizers, and the LLVM code generators instead of the gcc
+code generators, just like llvm-gcc.  To use it, you add
+"-fplugin=path/dragonegg.so" to the gcc-4.5 command line, and gcc-4.5 magically
+becomes llvm-gcc-4.5!
+</p>
+
+<p>
+DragonEgg is still a work in progress.  Currently C works very well, while C++,
+Ada and Fortran work fairly well.  All other languages either don't work at all,
+or only work poorly.  For the moment only the x86-32 and x86-64 targets are
+supported, and only on linux.
+</p>
+
+<p>
+DragonEgg has not yet been released.  Once gcc-4.5 has been released, dragonegg
+will probably be released as part of the following LLVM release.
 </p>
 
 </div>
@@ -1058,7 +1078,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2010-04-01 03:53:24 +0200 (Thu, 01 Apr 2010) $
+  Last modified: $Date: 2010-04-02 11:23:15 +0200 (Fri, 02 Apr 2010) $
 </address>
 
 </body>
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 4e8c4c85bfe4..d68bfa3c0dfe 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -31,8 +31,6 @@ namespace llvm {
   class Type;
   class Value;
   class DbgDeclareInst;
-  class DebugLoc;
-  struct DebugLocTracker;
   class Instruction;
   class MDNode;
   class LLVMContext;
@@ -710,11 +708,6 @@ namespace llvm {
                        std::string &Type, unsigned &LineNo, std::string &File,
                        std::string &Dir);
 
-  /// ExtractDebugLocation - Extract debug location information
-  /// from DILocation.
-  DebugLoc ExtractDebugLocation(DILocation &Loc,
-                                DebugLocTracker &DebugLocInfo);
-
   /// getDISubprogram - Find subprogram that is enclosing this scope.
   DISubprogram getDISubprogram(MDNode *Scope);
 
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 10ff1033dcef..6f77d019b691 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -31,6 +31,10 @@ public:
   explicit LoopPass(intptr_t pid) : Pass(PT_Loop, pid) {}
   explicit LoopPass(void *pid) : Pass(PT_Loop, pid) {}
 
+  /// getPrinterPass - Get a pass to print the function corresponding
+  /// to a Loop.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
   // runOnLoop - This method should be implemented by the subclass to perform
   // whatever action is necessary for the specified Loop.
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0;
diff --git a/include/llvm/Assembly/PrintModulePass.h b/include/llvm/Assembly/PrintModulePass.h
index fb4f6a7e13ea..239fbcc0c8ca 100644
--- a/include/llvm/Assembly/PrintModulePass.h
+++ b/include/llvm/Assembly/PrintModulePass.h
@@ -27,7 +27,9 @@ namespace llvm {
   
   /// createPrintModulePass - Create and return a pass that writes the
   /// module to the specified raw_ostream.
-  ModulePass *createPrintModulePass(raw_ostream *OS, bool DeleteStream=false);
+  ModulePass *createPrintModulePass(raw_ostream *OS,
+                                    bool DeleteStream=false,
+                                    const std::string &Banner = "");
   
   /// createPrintFunctionPass - Create and return a pass that prints
   /// functions to the specified raw_ostream as they are processed.
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index e358f91f90ad..bf5874f6824a 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -131,6 +131,12 @@ public:
   const Instruction* getFirstNonPHI() const {
     return const_cast<BasicBlock*>(this)->getFirstNonPHI();
   }
+
+  // Same as above, but also skip debug intrinsics.
+  Instruction* getFirstNonPHIOrDbg();
+  const Instruction* getFirstNonPHIOrDbg() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbg();
+  }
   
   /// removeFromParent - This method unlinks 'this' from the containing
   /// function, but does not delete it.
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index a980df811040..de9b64d4a46c 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -240,7 +240,10 @@ namespace bitc {
     // new select on i1 or [N x i1]
     FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
     FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
-    FUNC_CODE_INST_INDIRECTBR  = 31  // INDIRECTBR: [opty, op0, op1, ...]
+    FUNC_CODE_INST_INDIRECTBR  = 31, // INDIRECTBR: [opty, op0, op1, ...]
+    
+    FUNC_CODE_DEBUG_LOC        = 32, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal]
+    FUNC_CODE_DEBUG_LOC_AGAIN  = 33  // DEBUG_LOC_AGAIN
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h
index feab7637969d..37a454e07aab 100644
--- a/include/llvm/CallGraphSCCPass.h
+++ b/include/llvm/CallGraphSCCPass.h
@@ -35,6 +35,10 @@ struct CallGraphSCCPass : public Pass {
   explicit CallGraphSCCPass(intptr_t pid) : Pass(PT_CallGraphSCC, pid) {}
   explicit CallGraphSCCPass(void *pid) : Pass(PT_CallGraphSCC, pid) {}
 
+  /// createPrinterPass - Get a pass that prints the Module
+  /// corresponding to a CallGraph.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
   /// doInitialization - This method is called before the SCC's of the program
   /// has been processed, allowing the pass to do initialization as necessary.
   virtual bool doInitialization(CallGraph &CG) {
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index ffeb44da7bc8..a405932beadb 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
   class BlockAddress;
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 76ec9db5510d..59b171850d0d 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -26,7 +26,6 @@
 
 namespace llvm {
 
-class DILocation;
 class Value;
 class Function;
 class MachineRegisterInfo;
@@ -112,9 +111,6 @@ class MachineFunction {
   // of a function.
   DebugLoc DefaultDebugLoc;
 
-  // Tracks debug locations.
-  DebugLocTracker DebugLocInfo;
-
   /// FunctionNumber - This provides a unique ID for each function emitted in
   /// this translation unit.
   ///
@@ -402,9 +398,6 @@ public:
   // Debug location.
   //
 
-  /// getDILocation - Get the DILocation for a given DebugLoc object.
-  DILocation getDILocation(DebugLoc DL) const;
-
   /// getDefaultDebugLoc - Get the default debug location for the machine
   /// function.
   DebugLoc getDefaultDebugLoc() const { return DefaultDebugLoc; }
@@ -412,9 +405,6 @@ public:
   /// setDefaultDebugLoc - Get the default debug location for the machine
   /// function.
   void setDefaultDebugLoc(DebugLoc DL) { DefaultDebugLoc = DL; }
-
-  /// getDebugLocInfo - Get the debug info location tracker.
-  DebugLocTracker &getDebugLocInfo() { return DebugLocInfo; }
 };
 
 //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index bac110316d4f..1a2b12972aba 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -34,6 +34,9 @@ protected:
   explicit MachineFunctionPass(intptr_t ID) : FunctionPass(ID) {}
   explicit MachineFunctionPass(void *ID) : FunctionPass(ID) {}
 
+  /// createPrinterPass - Get a machine function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
   /// runOnMachineFunction - This method must be overloaded to perform the
   /// desired machine code transformation or analysis.
   ///
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index d84f882bcd1b..fa819275271a 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -16,12 +16,13 @@
 #ifndef LLVM_CODEGEN_MACHINEINSTR_H
 #define LLVM_CODEGEN_MACHINEINSTR_H
 
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetOpcodes.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/DebugLoc.h"
 #include <vector>
 
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index d446eaeb7f49..d610390b6357 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/MachineLocation.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/System/DataTypes.h"
 #include "llvm/ADT/DenseMap.h"
@@ -156,8 +157,8 @@ class MachineModuleInfo : public ImmutablePass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  typedef std::pair<unsigned, TrackingVH<MDNode> > UnsignedAndMDNodePair;
-  typedef SmallVector< std::pair<TrackingVH<MDNode>, UnsignedAndMDNodePair>, 4>
+  typedef std::pair<unsigned, DebugLoc> UnsignedDebugLocPair;
+  typedef SmallVector<std::pair<TrackingVH<MDNode>, UnsignedDebugLocPair>, 4>
     VariableDbgInfoMapTy;
   VariableDbgInfoMapTy VariableDbgInfo;
 
@@ -330,10 +331,10 @@ public:
   /// of one is required to emit exception handling info.
   Function *getPersonality() const;
 
-  /// setVariableDbgInfo - Collect information used to emit debugging information
-  /// of a variable.
-  void setVariableDbgInfo(MDNode *N, unsigned Slot, MDNode *Scope) {
-    VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Scope)));
+  /// setVariableDbgInfo - Collect information used to emit debugging
+  /// information of a variable.
+  void setVariableDbgInfo(MDNode *N, unsigned Slot, DebugLoc Loc) {
+    VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Loc)));
   }
 
   VariableDbgInfoMapTy &getVariableDbgInfo() {  return VariableDbgInfo;  }
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 911be223b21a..d3c2720c15f5 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -21,6 +21,7 @@
 namespace llvm {
 
   class FunctionPass;
+  class MachineFunctionPass;
   class PassInfo;
   class TargetLowering;
   class RegisterCoalescer;
@@ -36,8 +37,9 @@ namespace llvm {
 
   /// MachineFunctionPrinter pass - This pass prints out the machine function to
   /// the given stream, as a debugging tool.
-  FunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
-                                                 const std::string &Banner ="");
+  MachineFunctionPass *
+  createMachineFunctionPrinterPass(raw_ostream &OS,
+                                   const std::string &Banner ="");
 
   /// MachineLoopInfo pass - This pass is a loop analysis pass.
   /// 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 610edb6fcfb1..ef5d7e2a8959 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -461,8 +461,7 @@ public:
   SDValue getCALLSEQ_START(SDValue Chain, SDValue Op) {
     SDVTList VTs = getVTList(MVT::Other, MVT::Flag);
     SDValue Ops[] = { Chain,  Op };
-    return getNode(ISD::CALLSEQ_START, DebugLoc::getUnknownLoc(),
-                   VTs, Ops, 2);
+    return getNode(ISD::CALLSEQ_START, DebugLoc(), VTs, Ops, 2);
   }
 
   /// getCALLSEQ_END - Return a new CALLSEQ_END node, which always must have a
@@ -476,20 +475,19 @@ public:
     Ops.push_back(Op1);
     Ops.push_back(Op2);
     Ops.push_back(InFlag);
-    return getNode(ISD::CALLSEQ_END, DebugLoc::getUnknownLoc(), NodeTys,
-                   &Ops[0],
+    return getNode(ISD::CALLSEQ_END, DebugLoc(), NodeTys, &Ops[0],
                    (unsigned)Ops.size() - (InFlag.getNode() == 0 ? 1 : 0));
   }
 
   /// getUNDEF - Return an UNDEF node.  UNDEF does not have a useful DebugLoc.
   SDValue getUNDEF(EVT VT) {
-    return getNode(ISD::UNDEF, DebugLoc::getUnknownLoc(), VT);
+    return getNode(ISD::UNDEF, DebugLoc(), VT);
   }
 
   /// getGLOBAL_OFFSET_TABLE - Return a GLOBAL_OFFSET_TABLE node.  This does
   /// not have a useful DebugLoc.
   SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
-    return getNode(ISD::GLOBAL_OFFSET_TABLE, DebugLoc::getUnknownLoc(), VT);
+    return getNode(ISD::GLOBAL_OFFSET_TABLE, DebugLoc(), VT);
   }
 
   /// getNode - Gets or creates the specified node.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 4dcf0135a474..782d354bdfe7 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1569,8 +1569,7 @@ public:
 #else
   explicit HandleSDNode(SDValue X)
 #endif
-    : SDNode(ISD::HANDLENODE, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)) {
+    : SDNode(ISD::HANDLENODE, DebugLoc(), getSDVTList(MVT::Other)) {
     InitOperands(&Op, X);
   }
   ~HandleSDNode();
@@ -1801,7 +1800,7 @@ class ConstantSDNode : public SDNode {
   friend class SelectionDAG;
   ConstantSDNode(bool isTarget, const ConstantInt *val, EVT VT)
     : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant,
-             DebugLoc::getUnknownLoc(), getSDVTList(VT)), Value(val) {
+             DebugLoc(), getSDVTList(VT)), Value(val) {
   }
 public:
 
@@ -1825,7 +1824,7 @@ class ConstantFPSDNode : public SDNode {
   friend class SelectionDAG;
   ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
     : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP,
-             DebugLoc::getUnknownLoc(), getSDVTList(VT)), Value(val) {
+             DebugLoc(), getSDVTList(VT)), Value(val) {
   }
 public:
 
@@ -1896,7 +1895,7 @@ class FrameIndexSDNode : public SDNode {
   friend class SelectionDAG;
   FrameIndexSDNode(int fi, EVT VT, bool isTarg)
     : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
-      DebugLoc::getUnknownLoc(), getSDVTList(VT)), FI(fi) {
+      DebugLoc(), getSDVTList(VT)), FI(fi) {
   }
 public:
 
@@ -1915,7 +1914,7 @@ class JumpTableSDNode : public SDNode {
   friend class SelectionDAG;
   JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
     : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
-      DebugLoc::getUnknownLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
+      DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
   }
 public:
 
@@ -1941,7 +1940,7 @@ class ConstantPoolSDNode : public SDNode {
   ConstantPoolSDNode(bool isTarget, Constant *c, EVT VT, int o, unsigned Align,
                      unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
-             DebugLoc::getUnknownLoc(),
+             DebugLoc(),
              getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
     assert((int)Offset >= 0 && "Offset is too large");
     Val.ConstVal = c;
@@ -1949,7 +1948,7 @@ class ConstantPoolSDNode : public SDNode {
   ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
                      EVT VT, int o, unsigned Align, unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
-             DebugLoc::getUnknownLoc(),
+             DebugLoc(),
              getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
     assert((int)Offset >= 0 && "Offset is too large");
     Val.MachineCPVal = v;
@@ -1997,8 +1996,7 @@ class BasicBlockSDNode : public SDNode {
   /// blocks out of order when they're jumped to, which makes it a bit
   /// harder.  Let's see if we need it first.
   explicit BasicBlockSDNode(MachineBasicBlock *mbb)
-    : SDNode(ISD::BasicBlock, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)), MBB(mbb) {
+    : SDNode(ISD::BasicBlock, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) {
   }
 public:
 
@@ -2044,8 +2042,7 @@ class SrcValueSDNode : public SDNode {
   friend class SelectionDAG;
   /// Create a SrcValue for a general value.
   explicit SrcValueSDNode(const Value *v)
-    : SDNode(ISD::SRCVALUE, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)), V(v) {}
+    : SDNode(ISD::SRCVALUE, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
 
 public:
   /// getValue - return the contained Value.
@@ -2062,8 +2059,7 @@ class RegisterSDNode : public SDNode {
   unsigned Reg;
   friend class SelectionDAG;
   RegisterSDNode(unsigned reg, EVT VT)
-    : SDNode(ISD::Register, DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Reg(reg) {
+    : SDNode(ISD::Register, DebugLoc(), getSDVTList(VT)), Reg(reg) {
   }
 public:
 
@@ -2081,7 +2077,7 @@ class BlockAddressSDNode : public SDNode {
   friend class SelectionDAG;
   BlockAddressSDNode(unsigned NodeTy, EVT VT, BlockAddress *ba,
                      unsigned char Flags)
-    : SDNode(NodeTy, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
+    : SDNode(NodeTy, DebugLoc(), getSDVTList(VT)),
              BA(ba), TargetFlags(Flags) {
   }
 public:
@@ -2119,8 +2115,7 @@ class ExternalSymbolSDNode : public SDNode {
   friend class SelectionDAG;
   ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
     : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
-             DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {
+             DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {
   }
 public:
 
@@ -2138,8 +2133,8 @@ class CondCodeSDNode : public SDNode {
   ISD::CondCode Condition;
   friend class SelectionDAG;
   explicit CondCodeSDNode(ISD::CondCode Cond)
-    : SDNode(ISD::CONDCODE, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)), Condition(Cond) {
+    : SDNode(ISD::CONDCODE, DebugLoc(), getSDVTList(MVT::Other)),
+      Condition(Cond) {
   }
 public:
 
@@ -2296,8 +2291,8 @@ class VTSDNode : public SDNode {
   EVT ValueType;
   friend class SelectionDAG;
   explicit VTSDNode(EVT VT)
-    : SDNode(ISD::VALUETYPE, DebugLoc::getUnknownLoc(),
-             getSDVTList(MVT::Other)), ValueType(VT) {
+    : SDNode(ISD::VALUETYPE, DebugLoc(), getSDVTList(MVT::Other)),
+      ValueType(VT) {
   }
 public:
 
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index dd4caba1e568..caefdf4489df 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -22,11 +22,11 @@
 #ifndef LLVM_CODEGEN_SLOTINDEXES_H
 #define LLVM_CODEGEN_SLOTINDEXES_H
 
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 13331e60ed2f..0b772b0aae44 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -32,7 +32,7 @@ class Instruction : public User, public ilist_node<Instruction> {
   Instruction(const Instruction &);        // Do not implement
 
   BasicBlock *Parent;
-  NewDebugLoc DbgLoc;                      // 'dbg' Metadata cache.
+  DebugLoc DbgLoc;                         // 'dbg' Metadata cache.
   
   enum {
     /// HasMetadataBit - This is a bit stored in the SubClassData field which
@@ -181,10 +181,10 @@ public:
   }
 
   /// setDebugLoc - Set the debug location information for this instruction.
-  void setDebugLoc(const NewDebugLoc &Loc) { DbgLoc = Loc; }
+  void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
   
   /// getDebugLoc - Return the debug location for this node as a DebugLoc.
-  const NewDebugLoc &getDebugLoc() const { return DbgLoc; }
+  const DebugLoc &getDebugLoc() const { return DbgLoc; }
   
 private:
   /// hasMetadataHashEntry - Return true if we have an entry in the on-the-side
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index 25169b4597d7..a48a1ed27741 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -782,7 +782,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Advanced Encryption Standard (AES) Instructions
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_aesni_aesimc          : GCCBuiltin<"__builtin_ia32_aesimc128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty],
                         [IntrNoMem]>;
   def int_x86_aesni_aesenc          : GCCBuiltin<"__builtin_ia32_aesenc128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
@@ -797,7 +797,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
                         [IntrNoMem]>;
   def int_x86_aesni_aeskeygenassist : 
-              GCCBuiltin<"__builtin_ia32_aeskeygenassist">,
+              GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
                         [IntrNoMem]>;
 }
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 8fc3a5307b3a..8d0c47d7bbe3 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -30,7 +30,9 @@
 #define LLVM_PASS_H
 
 #include "llvm/System/DataTypes.h"
+
 #include <cassert>
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -120,6 +122,11 @@ public:
   virtual void print(raw_ostream &O, const Module *M) const;
   void dump() const; // dump - Print to stderr.
 
+  /// createPrinterPass - Get a Pass appropriate to print the IR this
+  /// pass operates one (Module, Function or MachineFunction).
+  virtual Pass *createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const = 0;
+
   /// Each pass is responsible for assigning a pass manager to itself.
   /// PMS is the stack of available pass manager. 
   virtual void assignPassManager(PMStack &, 
@@ -233,6 +240,9 @@ public:
 ///
 class ModulePass : public Pass {
 public:
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
   /// runOnModule - Virtual method overriden by subclasses to process the module
   /// being operated on.
   virtual bool runOnModule(Module &M) = 0;
@@ -293,6 +303,9 @@ public:
   explicit FunctionPass(intptr_t pid) : Pass(PT_Function, pid) {}
   explicit FunctionPass(const void *pid) : Pass(PT_Function, pid) {}
 
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
@@ -343,6 +356,9 @@ public:
   explicit BasicBlockPass(intptr_t pid) : Pass(PT_BasicBlock, pid) {}
   explicit BasicBlockPass(const void *pid) : Pass(PT_BasicBlock, pid) {}
 
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index bd381807e0c6..eb6c2d1e25a7 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -200,7 +200,7 @@ public:
     while (Slab) {
       char *End = Slab == Allocator.CurSlab ? Allocator.CurPtr :
                                               (char *)Slab + Slab->Size;
-      for (char *Ptr = (char*)Slab+1; Ptr < End; Ptr += sizeof(T)) {
+      for (char *Ptr = (char*)(Slab+1); Ptr < End; Ptr += sizeof(T)) {
         Ptr = Allocator.AlignPtr(Ptr, alignof<T>());
 	if (Ptr + sizeof(T) <= End)
           reinterpret_cast<T*>(Ptr)->~T();
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index ede1ed305450..ccc344612913 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -12,11 +12,8 @@
 // 
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGLOC_H
-#define LLVM_DEBUGLOC_H
-
-#include "llvm/ADT/DenseMap.h"
-#include <vector>
+#ifndef LLVM_SUPPORT_DEBUGLOC_H
+#define LLVM_SUPPORT_DEBUGLOC_H
 
 namespace llvm {
   class MDNode;
@@ -25,7 +22,7 @@ namespace llvm {
   /// DebugLoc - Debug location id.  This is carried by Instruction, SDNode,
   /// and MachineInstr to compactly encode file/line/scope information for an
   /// operation.
-  class NewDebugLoc {
+  class DebugLoc {
     /// LineCol - This 32-bit value encodes the line and column number for the
     /// location, encoded as 24-bits for line and 8 bits for col.  A value of 0
     /// for either means unknown.
@@ -35,15 +32,15 @@ namespace llvm {
     /// decoded by LLVMContext.  0 is unknown.
     int ScopeIdx;
   public:
-    NewDebugLoc() : LineCol(0), ScopeIdx(0) {}  // Defaults to unknown.
+    DebugLoc() : LineCol(0), ScopeIdx(0) {}  // Defaults to unknown.
     
     /// get - Get a new DebugLoc that corresponds to the specified line/col
     /// scope/inline location.
-    static NewDebugLoc get(unsigned Line, unsigned Col,
-                           MDNode *Scope, MDNode *InlinedAt = 0);
+    static DebugLoc get(unsigned Line, unsigned Col,
+                        MDNode *Scope, MDNode *InlinedAt = 0);
     
-    /// getFromDILocation - Translate the DILocation quad into a NewDebugLoc.
-    static NewDebugLoc getFromDILocation(MDNode *N);
+    /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+    static DebugLoc getFromDILocation(MDNode *N);
     
     /// isUnknown - Return true if this is an unknown location.
     bool isUnknown() const { return ScopeIdx == 0; }
@@ -73,48 +70,11 @@ namespace llvm {
     /// DILocation compatible MDNode.
     MDNode *getAsMDNode(const LLVMContext &Ctx) const;
     
-    bool operator==(const NewDebugLoc &DL) const {
+    bool operator==(const DebugLoc &DL) const {
       return LineCol == DL.LineCol && ScopeIdx == DL.ScopeIdx;
     }
-    bool operator!=(const NewDebugLoc &DL) const { return !(*this == DL); }
-  };
-  
-  
-
-  /// DebugLoc - Debug location id. This is carried by SDNode and MachineInstr
-  /// to index into a vector of unique debug location tuples.
-  class DebugLoc {
-    unsigned Idx;
-  public:
-    DebugLoc() : Idx(~0U) {}  // Defaults to invalid.
-
-    static DebugLoc getUnknownLoc()   { DebugLoc L; L.Idx = ~0U; return L; }
-    static DebugLoc get(unsigned idx) { DebugLoc L; L.Idx = idx; return L; }
-
-    unsigned getIndex() const { return Idx; }
-
-    /// isUnknown - Return true if there is no debug info for the SDNode /
-    /// MachineInstr.
-    bool isUnknown() const { return Idx == ~0U; }
-
-    bool operator==(const DebugLoc &DL) const { return Idx == DL.Idx; }
     bool operator!=(const DebugLoc &DL) const { return !(*this == DL); }
   };
-
-  /// DebugLocTracker - This class tracks debug location information.
-  ///
-  struct DebugLocTracker {
-    /// DebugLocations - A vector of unique DebugLocTuple's.
-    ///
-    std::vector<MDNode *> DebugLocations;
-
-    /// DebugIdMap - This maps DebugLocTuple's to indices into the
-    /// DebugLocations vector.
-    DenseMap<MDNode *, unsigned> DebugIdMap;
-
-    DebugLocTracker() {}
-  };
-  
 } // end namespace llvm
 
 #endif /* LLVM_DEBUGLOC_H */
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index c352625aeb4d..faa8fa3aee2b 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -40,7 +40,7 @@ protected:
 
 /// IRBuilderBase - Common base class shared among various IRBuilders.
 class IRBuilderBase {
-  NewDebugLoc CurDbgLocation;
+  DebugLoc CurDbgLocation;
 protected:
   BasicBlock *BB;
   BasicBlock::iterator InsertPt;
@@ -82,13 +82,13 @@ public:
   
   /// SetCurrentDebugLocation - Set location information used by debugging
   /// information.
-  void SetCurrentDebugLocation(const NewDebugLoc &L) {
+  void SetCurrentDebugLocation(const DebugLoc &L) {
     CurDbgLocation = L;
   }
   
   /// getCurrentDebugLocation - Get location information used by debugging
   /// information.
-  const NewDebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
+  const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
   
   /// SetInstDebugLocation - If this builder has a current debug location, set
   /// it on the specified instruction.
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 9c5f32cd5ff2..f56241c5790b 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -457,6 +457,18 @@ inline int64_t abs64(int64_t x) {
   return (x < 0) ? -x : x;
 }
 
+/// SignExtend32 - Sign extend B-bit number x to 32-bit int.
+/// Usage int32_t r = SignExtend32<5>(x);
+template <unsigned B> inline int32_t SignExtend32(int32_t x) {
+  return (x << (32 - B)) >> (32 - B);
+}
+
+/// SignExtend64 - Sign extend B-bit number x to 64-bit int.
+/// Usage int64_t r = SignExtend64<5>(x);
+template <unsigned B> inline int64_t SignExtend64(int32_t x) {
+  return (x << (64 - B)) >> (64 - B);
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Support/SlowOperationInformer.h b/include/llvm/Support/SlowOperationInformer.h
index 524049cbff17..607d993fff7b 100644
--- a/include/llvm/Support/SlowOperationInformer.h
+++ b/include/llvm/Support/SlowOperationInformer.h
@@ -41,7 +41,7 @@ namespace llvm {
     SlowOperationInformer(const SlowOperationInformer&);   // DO NOT IMPLEMENT
     void operator=(const SlowOperationInformer&);          // DO NOT IMPLEMENT
   public:
-    SlowOperationInformer(const std::string &Name);
+    explicit SlowOperationInformer(const std::string &Name);
     ~SlowOperationInformer();
 
     /// progress - Clients should periodically call this method when they can
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index b0534ddaa5e5..f040c9db38ca 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -633,15 +633,19 @@ public:
   }
 
   /// getOptimalMemOpType - Returns the target specific optimal type for load
-  /// and store operations as a result of memset, memcpy, and memmove lowering.
-  /// If DstAlign is zero that means it's safe to destination alignment can
-  /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
-  /// a need to check it against alignment requirement, probably because the
-  /// source does not need to be loaded. It returns EVT::Other if SelectionDAG
-  /// should be responsible for determining it.
+  /// and store operations as a result of memset, memcpy, and memmove
+  /// lowering. If DstAlign is zero that means it's safe to destination
+  /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+  /// means there isn't a need to check it against alignment requirement,
+  /// probably because the source does not need to be loaded. If
+  /// 'NonScalarIntSafe' is true, that means it's safe to return a
+  /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+  /// from memory. It returns EVT::Other if SelectionDAG should be responsible
+  /// for determining it.
   virtual EVT getOptimalMemOpType(uint64_t Size,
                                   unsigned DstAlign, unsigned SrcAlign,
-                                  bool SafeToUseFP, SelectionDAG &DAG) const {
+                                  bool NonScalarIntSafe,
+                                  SelectionDAG &DAG) const {
     return MVT::Other;
   }
   
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index b29b749e8d9a..927e156abfb5 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -27,28 +27,22 @@ namespace llvm {
 /// transformation wants to rewrite a set of uses of one value with uses of a
 /// set of values.
 class SSAUpdater {
-public:
-  class BBInfo;
-
-private:
   /// AvailableVals - This keeps track of which value to use on a per-block
-  /// basis.  When we insert PHI nodes, we keep track of them here.
-  //typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+  /// basis.  When we insert PHI nodes, we keep track of them here.  We use
+  /// TrackingVH's for the value of the map because we RAUW PHI nodes when we
+  /// eliminate them, and want the TrackingVH's to track this.
+  //typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
   void *AV;
 
   /// PrototypeValue is an arbitrary representative value, which we derive names
   /// and a type for PHI nodes.
   Value *PrototypeValue;
 
-  /// BBMap - The GetValueAtEndOfBlock method maintains this mapping from
-  /// basic blocks to BBInfo structures.
-  /// typedef DenseMap<BasicBlock*, BBInfo*> BBMapTy;
-  void *BM;
-
-  /// Allocator - The GetValueAtEndOfBlock method uses this BumpPtrAllocator to
-  /// hold its internal data.  The allocator and its storage is created and
-  /// discarded for each invocation of GetValueAtEndOfBlock.
-  void *BPA;
+  /// IncomingPredInfo - We use this as scratch space when doing our recursive
+  /// walk.  This should only be used in GetValueInBlockInternal, normally it
+  /// should be empty.
+  //std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > IncomingPredInfo;
+  void *IPI;
 
   /// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that
   /// it creates to the vector.
@@ -105,14 +99,6 @@ public:
 
 private:
   Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
-  void FindPHIPlacement(BasicBlock *BB, BBInfo *Info, bool &Changed,
-                        unsigned Counter);
-  void FindAvailableVal(BasicBlock *BB, BBInfo *Info, unsigned Counter);
-  void FindExistingPHI(BasicBlock *BB);
-  bool CheckIfPHIMatches(PHINode *PHI);
-  void RecordMatchingPHI(PHINode *PHI);
-  void ClearPHITags(PHINode *PHI);
-
   void operator=(const SSAUpdater&); // DO NOT IMPLEMENT
   SSAUpdater(const SSAUpdater&);     // DO NOT IMPLEMENT
 };
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index f12552d9e400..8ba19020b099 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -24,7 +24,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 using namespace llvm::dwarf;
@@ -1147,16 +1146,31 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
 
 /// processModule - Process entire module and collect debug info.
 void DebugInfoFinder::processModule(Module &M) {
-  unsigned MDDbgKind = M.getMDKindID("dbg");
-
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
       for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
            ++BI) {
-        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
           processDeclare(DDI);
-        else if (MDNode *L = BI->getMetadata(MDDbgKind)) 
-          processLocation(DILocation(L));
+          continue;
+        }
+        
+        DebugLoc Loc = BI->getDebugLoc();
+        if (Loc.isUnknown())
+          continue;
+        
+        LLVMContext &Ctx = BI->getContext();
+        DIDescriptor Scope(Loc.getScope(Ctx));
+        
+        if (Scope.isCompileUnit())
+          addCompileUnit(DICompileUnit(Scope.getNode()));
+        else if (Scope.isSubprogram())
+          processSubprogram(DISubprogram(Scope.getNode()));
+        else if (Scope.isLexicalBlock())
+          processLexicalBlock(DILexicalBlock(Scope.getNode()));
+        
+        if (MDNode *IA = Loc.getInlinedAt(Ctx))
+          processLocation(DILocation(IA));
       }
 
   NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
@@ -1372,23 +1386,6 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName,
   return true;
 }
 
-/// ExtractDebugLocation - Extract debug location information
-/// from DILocation.
-DebugLoc llvm::ExtractDebugLocation(DILocation &Loc,
-                                    DebugLocTracker &DebugLocInfo) {
-  DenseMap<MDNode *, unsigned>::iterator II
-    = DebugLocInfo.DebugIdMap.find(Loc.getNode());
-  if (II != DebugLocInfo.DebugIdMap.end())
-    return DebugLoc::get(II->second);
-
-  // Add a new location entry.
-  unsigned Id = DebugLocInfo.DebugLocations.size();
-  DebugLocInfo.DebugLocations.push_back(Loc.getNode());
-  DebugLocInfo.DebugIdMap[Loc.getNode()] = Id;
-
-  return DebugLoc::get(Id);
-}
-
 /// getDISubprogram - Find subprogram that is enclosing this scope.
 DISubprogram llvm::getDISubprogram(MDNode *Scope) {
   DIDescriptor D(Scope);
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 0f39f44a0099..fb0804190ac1 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -87,10 +87,40 @@ private:
                         bool IsCheckingMode);
 };
 
+/// PrintCallGraphPass - Print a Module corresponding to a call graph.
+///
+class PrintCallGraphPass : public CallGraphSCCPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
+  PrintCallGraphPass(const std::string &B, raw_ostream &o)
+      : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  bool runOnSCC(std::vector<CallGraphNode *> &SCC) {
+    Out << Banner;
+    for (std::vector<CallGraphNode *>::iterator n = SCC.begin(), ne = SCC.end();
+         n != ne;
+         ++n) {
+      (*n)->getFunction()->print(Out);
+    }
+    return false;
+  }
+};
+
 } // end anonymous namespace.
 
 char CGPassManager::ID = 0;
 
+char PrintCallGraphPass::ID = 0;
+
 bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
                                  CallGraph &CG, bool &CallGraphUpToDate) {
   bool Changed = false;
@@ -396,6 +426,11 @@ bool CGPassManager::doFinalization(CallGraph &CG) {
   return Changed;
 }
 
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+                                          const std::string &Banner) const {
+  return new PrintCallGraphPass(Banner, O);
+}
+
 /// Assign pass manager to manage this pass.
 void CallGraphSCCPass::assignPassManager(PMStack &PMS,
                                          PassManagerType PreferredType) {
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index e2d2c2bc9d88..2727d2f9465c 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -14,9 +14,44 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 using namespace llvm;
 
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPass : public LoopPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintLoopPass() : LoopPass(&ID), Out(dbgs()) {}
+  PrintLoopPass(const std::string &B, raw_ostream &o)
+      : LoopPass(&ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &) {
+    Out << Banner;
+    for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+         b != be;
+         ++b) {
+      (*b)->print(Out);
+    }
+    return false;
+  }
+};
+
+char PrintLoopPass::ID = 0;
+}
+
 //===----------------------------------------------------------------------===//
 // LPPassManager
 //
@@ -306,6 +341,11 @@ void LPPassManager::dumpPassStructure(unsigned Offset) {
 //===----------------------------------------------------------------------===//
 // LoopPass
 
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintLoopPass(Banner, O);
+}
+
 // Check if this pass is suitable for the current LPPassManager, if
 // available. This pass P is not suitable for a LPPassManager if P
 // is not preserving higher level analysis info used by other
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 76d112e045a3..69adead4ba8e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1644,6 +1644,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
   BasicBlock *CurBB = 0;
   unsigned CurBBNo = 0;
 
+  DebugLoc LastLoc;
+  
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
   while (1) {
@@ -1699,6 +1701,46 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       CurBB = FunctionBBs[0];
       continue;
 
+        
+    case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:  // DEBUG_LOC_AGAIN
+      // This record indicates that the last instruction is at the same
+      // location as the previous instruction with a location.
+      I = 0;
+        
+      // Get the last instruction emitted.
+      if (CurBB && !CurBB->empty())
+        I = &CurBB->back();
+      else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+               !FunctionBBs[CurBBNo-1]->empty())
+        I = &FunctionBBs[CurBBNo-1]->back();
+        
+      if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
+      I->setDebugLoc(LastLoc);
+      I = 0;
+      continue;
+        
+    case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
+      I = 0;     // Get the last instruction emitted.
+      if (CurBB && !CurBB->empty())
+        I = &CurBB->back();
+      else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+               !FunctionBBs[CurBBNo-1]->empty())
+        I = &FunctionBBs[CurBBNo-1]->back();
+      if (I == 0 || Record.size() < 4)
+        return Error("Invalid FUNC_CODE_DEBUG_LOC record");
+      
+      unsigned Line = Record[0], Col = Record[1];
+      unsigned ScopeID = Record[2], IAID = Record[3];
+      
+      MDNode *Scope = 0, *IA = 0;
+      if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
+      if (IAID)    IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+      LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+      I->setDebugLoc(LastLoc);
+      I = 0;
+      continue;
+    }
+
     case bitc::FUNC_CODE_INST_BINOP: {    // BINOP: [opval, ty, opval, opcode]
       unsigned OpNum = 0;
       Value *LHS, *RHS;
@@ -2285,8 +2327,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 
   // See if anything took the address of blocks in this function.  If so,
   // resolve them now.
-  /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
-  /// are resolved lazily when functions are loaded.
   DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
     BlockAddrFwdRefs.find(F);
   if (BAFRI != BlockAddrFwdRefs.end()) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1f69e1685166..9bda6dca3d5e 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -596,7 +596,8 @@ static void WriteFunctionLocalMetadata(const Function &F,
 static void WriteMetadataAttachment(const Function &F,
                                     const ValueEnumerator &VE,
                                     BitstreamWriter &Stream) {
-  bool StartedMetadataBlock = false;
+  Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+
   SmallVector<uint64_t, 64> Record;
 
   // Write metadata attachments
@@ -607,7 +608,7 @@ static void WriteMetadataAttachment(const Function &F,
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       MDs.clear();
-      I->getAllMetadata(MDs);
+      I->getAllMetadataOtherThanDebugLoc(MDs);
       
       // If no metadata, ignore instruction.
       if (MDs.empty()) continue;
@@ -618,16 +619,11 @@ static void WriteMetadataAttachment(const Function &F,
         Record.push_back(MDs[i].first);
         Record.push_back(VE.getValueID(MDs[i].second));
       }
-      if (!StartedMetadataBlock)  {
-        Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
-        StartedMetadataBlock = true;
-      }
       Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
       Record.clear();
     }
 
-  if (StartedMetadataBlock)
-    Stream.ExitBlock();
+  Stream.ExitBlock();
 }
 
 static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
@@ -1256,19 +1252,49 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   // Keep a running idea of what the instruction ID is.
   unsigned InstID = CstEnd;
 
+  bool NeedsMetadataAttachment = false;
+  
+  DebugLoc LastDL;
+  
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       WriteInstruction(*I, InstID, VE, Stream, Vals);
+      
       if (!I->getType()->isVoidTy())
         ++InstID;
+      
+      // If the instruction has metadata, write a metadata attachment later.
+      NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+      
+      // If the instruction has a debug location, emit it.
+      DebugLoc DL = I->getDebugLoc();
+      if (DL.isUnknown()) {
+        // nothing todo.
+      } else if (DL == LastDL) {
+        // Just repeat the same debug loc as last time.
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+      } else {
+        MDNode *Scope, *IA;
+        DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
+        
+        Vals.push_back(DL.getLine());
+        Vals.push_back(DL.getCol());
+        Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
+        Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+        Vals.clear();
+        
+        LastDL = DL;
+      }
     }
 
   // Emit names for all the instructions etc.
   WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
 
-  WriteMetadataAttachment(F, VE, Stream);
+  if (NeedsMetadataAttachment)
+    WriteMetadataAttachment(F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
 }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index aa4c3afab40e..d2baec770503 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -104,9 +104,16 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
 
         // Enumerate metadata attached with this instruction.
         MDs.clear();
-        I->getAllMetadata(MDs);
+        I->getAllMetadataOtherThanDebugLoc(MDs);
         for (unsigned i = 0, e = MDs.size(); i != e; ++i)
           EnumerateMetadata(MDs[i].second);
+        
+        if (!I->getDebugLoc().isUnknown()) {
+          MDNode *Scope, *IA;
+          I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
+          if (Scope) EnumerateMetadata(Scope);
+          if (IA) EnumerateMetadata(IA);
+        }
       }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3e71d18b5261..625a2b95f205 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -340,19 +340,17 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   const MachineFunction *MF = MI.getParent()->getParent();
   const TargetMachine &TM = MF->getTarget();
   
-  if (!MI.getDebugLoc().isUnknown()) {
-    DILocation DLT = MF->getDILocation(MI.getDebugLoc());
-    
-    // Print source line info.
-    DIScope Scope = DLT.getScope();
+  DebugLoc DL = MI.getDebugLoc();
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(MF->getFunction()->getContext()));
     // Omit the directory, because it's likely to be long and uninteresting.
     if (Scope.Verify())
       CommentOS << Scope.getFilename();
     else
       CommentOS << "<unknown>";
-    CommentOS << ':' << DLT.getLineNumber();
-    if (DLT.getColumnNumber() != 0)
-      CommentOS << ':' << DLT.getColumnNumber();
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
     CommentOS << '\n';
   }
   
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fb91d4f9849f..9084456c8f4d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -302,7 +302,7 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
   : DwarfPrinter(OS, A, T), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
     DIEBlocks(), SectionSourceLines(), didInitial(false), shouldEmit(false),
-    CurrentFnDbgScope(0), PrevDILoc(0), DebugTimer(0) {
+    CurrentFnDbgScope(0), DebugTimer(0) {
   NextStringPoolNumber = 0;
   if (TimePassesIsEnabled)
     DebugTimer = new Timer("Dwarf Debug Writer");
@@ -1932,13 +1932,14 @@ void DwarfDebug::endModule() {
 /// findAbstractVariable - Find abstract variable, if any, associated with Var.
 DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
                                               unsigned FrameIdx,
-                                              DILocation &ScopeLoc) {
+                                              DebugLoc ScopeLoc) {
 
   DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
   if (AbsDbgVariable)
     return AbsDbgVariable;
 
-  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+  LLVMContext &Ctx = Var.getNode()->getContext();
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
   if (!Scope)
     return NULL;
 
@@ -1953,13 +1954,14 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
 /// FIXME : Refactor findAbstractVariable.
 DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
                                               const MachineInstr *MI,
-                                              DILocation &ScopeLoc) {
+                                              DebugLoc ScopeLoc) {
 
   DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
   if (AbsDbgVariable)
     return AbsDbgVariable;
 
-  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+  LLVMContext &Ctx = Var.getNode()->getContext();
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
   if (!Scope)
     return NULL;
 
@@ -1975,24 +1977,27 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
 void DwarfDebug::collectVariableInfo() {
   if (!MMI) return;
 
+  const LLVMContext &Ctx = MF->getFunction()->getContext();
+
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
   for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
          VE = VMap.end(); VI != VE; ++VI) {
     MDNode *Var = VI->first;
     if (!Var) continue;
-    DIVariable DV (Var);
-    std::pair< unsigned, MDNode *> VP = VI->second;
-    DILocation ScopeLoc(VP.second);
-
-    DbgScope *Scope =
-      ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
-    if (!Scope)
-      Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+    DIVariable DV(Var);
+    const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+    DbgScope *Scope = 0;
+    if (MDNode *IA = VP.second.getInlinedAt(Ctx))
+      Scope = ConcreteScopes.lookup(IA);
+    if (Scope == 0)
+      Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
+    
     // If variable scope is not found then skip this variable.
-    if (!Scope)
+    if (Scope == 0)
       continue;
 
-    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc);
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, VP.second);
     DbgVariable *RegVar = new DbgVariable(DV, VP.first, AbsDbgVariable);
     Scope->addVariable(RegVar);
   }
@@ -2021,16 +2026,17 @@ void DwarfDebug::collectVariableInfo() {
 
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown()) continue;
-      DILocation ScopeLoc = MF->getDILocation(DL);
-      DbgScope *Scope =
-        ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
-      if (!Scope)
-        Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+      DbgScope *Scope = 0;
+      if (MDNode *IA = DL.getInlinedAt(Ctx))
+        Scope = ConcreteScopes.lookup(IA);
+      if (Scope == 0)
+        Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+      
       // If variable scope is not found then skip this variable.
-      if (!Scope)
+      if (Scope == 0)
         continue;
 
-      DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, ScopeLoc);
+      DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL);
       DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable);
       DbgValueStartMap[MInsn] = RegVar;
       Scope->addVariable(RegVar);
@@ -2044,12 +2050,15 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
   DebugLoc DL = MI->getDebugLoc();
   if (DL.isUnknown())
     return;
-  DILocation DILoc = MF->getDILocation(DL);
-  if (!DILoc.getScope().Verify())
-    return;
 
   // Check and update last known location info.
-  if(DILoc.getNode() == PrevDILoc)
+  if (DL == PrevInstLoc)
+    return;
+  
+  MDNode *Scope = DL.getScope(MF->getFunction()->getContext());
+  
+  // FIXME: Should only verify each scope once!
+  if (!DIScope(Scope).Verify())
     return;
 
   // DBG_VALUE instruction establishes new value.
@@ -2057,10 +2066,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
     DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
       = DbgValueStartMap.find(MI);
     if (DI != DbgValueStartMap.end()) {
-      MCSymbol *Label = recordSourceLine(DILoc.getLineNumber(),
-                                         DILoc.getColumnNumber(),
-                                         DILoc.getScope().getNode());
-      PrevDILoc = DILoc.getNode();
+      MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+      PrevInstLoc = DL;
       DI->second->setDbgValueLabel(Label);
     }
     return;
@@ -2068,10 +2075,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
 
   // Emit a label to indicate location change. This is used for line 
   // table even if this instruction does start a new scope.
-  MCSymbol *Label = recordSourceLine(DILoc.getLineNumber(),
-                                     DILoc.getColumnNumber(),
-                                     DILoc.getScope().getNode());
-  PrevDILoc = DILoc.getNode();
+  MCSymbol *Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+  PrevInstLoc = DL;
 
   // update DbgScope if this instruction starts a new scope.
   InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
@@ -2094,15 +2099,12 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
   DebugLoc DL = MI->getDebugLoc();
   if (DL.isUnknown())
     return;
-  DILocation DILoc = MF->getDILocation(DL);
-  if (!DILoc.getScope().Verify())
-    return;
-  
+
   // Emit a label and update DbgScope if this instruction ends a scope.
   InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
   if (I == DbgScopeEndMap.end())
     return;
-
+  
   MCSymbol *Label = MMI->getContext().CreateTempSymbol();
   Asm->OutStreamer.EmitLabel(Label);
 
@@ -2115,7 +2117,6 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
 
 /// createDbgScope - Create DbgScope for the scope.
 void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
-
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
@@ -2147,6 +2148,8 @@ bool DwarfDebug::extractScopeInformation() {
 
   DenseMap<const MachineInstr *, unsigned> MIIndexMap;
   unsigned MIIndex = 0;
+  LLVMContext &Ctx = MF->getFunction()->getContext();
+  
   // Scan each instruction and create scopes. First build working set of scopes.
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
        I != E; ++I) {
@@ -2156,16 +2159,17 @@ bool DwarfDebug::extractScopeInformation() {
       // FIXME : Remove DBG_VALUE check.
       if (MInsn->isDebugValue()) continue;
       MIIndexMap[MInsn] = MIIndex++;
+      
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown()) continue;
-      DILocation DLT = MF->getDILocation(DL);
-      DIScope DLTScope = DLT.getScope();
-      if (!DLTScope.getNode()) continue;
+      
+      MDNode *Scope = DL.getScope(Ctx);
+      
       // There is no need to create another DIE for compile unit. For all
       // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
-      if (DLTScope.isCompileUnit()) continue;
-      createDbgScope(DLTScope.getNode(), DLT.getOrigLocation().getNode());
+      if (DIScope(Scope).isCompileUnit()) continue;
+      createDbgScope(Scope, DL.getInlinedAt(Ctx));
     }
   }
 
@@ -2179,17 +2183,17 @@ bool DwarfDebug::extractScopeInformation() {
       // FIXME : Remove DBG_VALUE check.
       if (MInsn->isDebugValue()) continue;
       DebugLoc DL = MInsn->getDebugLoc();
-      if (DL.isUnknown())  continue;
-      DILocation DLT = MF->getDILocation(DL);
-      DIScope DLTScope = DLT.getScope();
-      if (!DLTScope.getNode()) continue;
+      if (DL.isUnknown()) continue;
+
+      MDNode *Scope = DL.getScope(Ctx);
+      if (Scope == 0) continue;
+      
       // There is no need to create another DIE for compile unit. For all
       // other scopes, create one DbgScope now. This will be translated
       // into a scope DIE at the end.
-      if (DLTScope.isCompileUnit()) continue;
-      DbgScope *Scope = getUpdatedDbgScope(DLTScope.getNode(), MInsn, 
-                                           DLT.getOrigLocation().getNode());
-      Scope->setLastInsn(MInsn);
+      if (DIScope(Scope).isCompileUnit()) continue;
+      DbgScope *DScope = getUpdatedDbgScope(Scope, MInsn, DL.getInlinedAt(Ctx));
+      DScope->setLastInsn(MInsn);
     }
   }
 
@@ -2255,20 +2259,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   // Emit label for the implicitly defined dbg.stoppoint at the start of the
   // function.
   DebugLoc FDL = MF->getDefaultDebugLoc();
-  if (!FDL.isUnknown()) {
-    DILocation DLT = MF->getDILocation(FDL);
-    DISubprogram SP = getDISubprogram(DLT.getScope().getNode());
-    unsigned Line, Col;
-    if (SP.Verify()) {
-      Line = SP.getLineNumber();
-      Col = 0;
-    } else {
-      Line = DLT.getLineNumber();
-      Col = DLT.getColumnNumber();
-    }
-    
-    recordSourceLine(Line, Col, DLT.getScope().getNode());
+  if (FDL.isUnknown()) return;
+  
+  MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
+  
+  DISubprogram SP = getDISubprogram(Scope);
+  unsigned Line, Col;
+  if (SP.Verify()) {
+    Line = SP.getLineNumber();
+    Col = 0;
+  } else {
+    Line = FDL.getLine();
+    Col = FDL.getCol();
   }
+  
+  recordSourceLine(Line, Col, Scope);
 }
 
 /// endFunction - Gather and emit post-function debug information.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ad6b0c2cb7cd..03d9d9935f2f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -195,7 +195,7 @@ class DwarfDebug : public DwarfPrinter {
 
   /// Previous instruction's location information. This is used to determine
   /// label location to indicate scope boundries in dwarf debug info.
-  mutable const MDNode *PrevDILoc;
+  DebugLoc PrevInstLoc;
 
   /// DebugTimer - Timer for the Dwarf debug writer.
   Timer *DebugTimer;
@@ -361,7 +361,8 @@ class DwarfDebug : public DwarfPrinter {
 
   /// getUpdatedDbgScope - Find or create DbgScope assicated with 
   /// the instruction. Initialize scope and update scope hierarchy.
-  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+  DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
+                               MDNode *InlinedAt);
 
   /// createDbgScope - Create DbgScope for the scope.
   void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
@@ -370,9 +371,9 @@ class DwarfDebug : public DwarfPrinter {
 
   /// findAbstractVariable - Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
-                                    DILocation &Loc);
+                                    DebugLoc Loc);
   DbgVariable *findAbstractVariable(DIVariable &Var, const MachineInstr *MI,
-                                    DILocation &Loc);
+                                    DebugLoc Loc);
 
   /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
   /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d385b860d86e..62d18836c93e 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_library(LLVMCodeGen
   MachineFunction.cpp
   MachineFunctionAnalysis.cpp
   MachineFunctionPass.cpp
+  MachineFunctionPrinterPass.cpp
   MachineInstr.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 8bae9edde721..7dbfd7d168bd 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -661,7 +661,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() {
 /// the start of the basic block (unless there already is one, in which case
 /// the existing call is returned).
 Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
-  Instruction *Start = BB->getFirstNonPHI();
+  Instruction *Start = BB->getFirstNonPHIOrDbg();
   // Is this a call to eh.exception?
   if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
     if (CI->getIntrinsicID() == Intrinsic::eh_exception)
@@ -681,7 +681,7 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
 /// (creating it if necessary) at the start of the basic block (unless
 /// there already is a load, in which case the existing load is returned).
 Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
-  Instruction *Start = BB->getFirstNonPHI();
+  Instruction *Start = BB->getFirstNonPHIOrDbg();
   // Is this a load of the exception temporary?
   if (ExceptionValueVar)
     if (LoadInst* LI = dyn_cast<LoadInst>(Start))
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index f6cc71f3a437..beac0c630bb5 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -39,40 +39,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-namespace {
-  struct Printer : public MachineFunctionPass {
-    static char ID;
-
-    raw_ostream &OS;
-    const std::string Banner;
-
-    Printer(raw_ostream &os, const std::string &banner) 
-      : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
-
-    const char *getPassName() const { return "MachineFunction Printer"; }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    bool runOnMachineFunction(MachineFunction &MF) {
-      OS << "# " << Banner << ":\n";
-      MF.print(OS);
-      return false;
-    }
-  };
-  char Printer::ID = 0;
-}
-
-/// Returns a newly-created MachineFunction Printer pass. The default banner is
-/// empty.
-///
-FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS,
-                                                     const std::string &Banner){
-  return new Printer(OS, Banner);
-}
-
 //===----------------------------------------------------------------------===//
 // MachineFunction implementation
 //===----------------------------------------------------------------------===//
@@ -436,15 +402,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
   return VReg;
 }
 
-/// getDILocation - Get the DILocation for a given DebugLoc object.
-DILocation MachineFunction::getDILocation(DebugLoc DL) const {
-  unsigned Idx = DL.getIndex();
-  assert(Idx < DebugLocInfo.DebugLocations.size() &&
-         "Invalid index into debug locations!");
-  return DILocation(DebugLocInfo.DebugLocations[Idx]);
-}
-
-
 /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
 /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
 /// normal 'L' label is returned.
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 2f8d4c9e7aa4..e5a491270a8c 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -15,8 +15,14 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
 using namespace llvm;
 
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+                                             const std::string &Banner) const {
+  return createMachineFunctionPrinterPass(O, Banner);
+}
+
 bool MachineFunctionPass::runOnFunction(Function &F) {
   // Do not codegen any 'available_externally' functions at all, they have
   // definitions outside the translation unit.
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 000000000000..547c4febc8da
--- /dev/null
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,60 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+  static char ID;
+
+  raw_ostream &OS;
+  const std::string Banner;
+
+  MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) 
+      : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+
+  const char *getPassName() const { return "MachineFunction Printer"; }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) {
+    OS << "# " << Banner << ":\n";
+    MF.print(OS);
+    return false;
+  }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+                                                      const std::string &Banner){
+  return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 40d6b2093be7..39b7fb507f8b 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -395,7 +395,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 /// TID NULL and no operands.
 MachineInstr::MachineInstr()
   : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
-    Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
+    Parent(0) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
 }
@@ -415,8 +415,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// instructions with variable number of operands).
 MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0),
-    debugLoc(DebugLoc::getUnknownLoc()) {
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
   if (!NoImp && TID->getImplicitDefs())
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
       NumImplicitOps++;
@@ -454,8 +453,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
   : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), 
-    debugLoc(DebugLoc::getUnknownLoc()) {
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   if (TID->ImplicitDefs)
     for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -1221,17 +1219,16 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
 
     // TODO: print InlinedAtLoc information
 
-    DILocation DLT = MF->getDILocation(debugLoc);
-    DIScope Scope = DLT.getScope();
+    DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext()));
     OS << " dbg:";
     // Omit the directory, since it's usually long and uninteresting.
     if (Scope.Verify())
       OS << Scope.getFilename();
     else
       OS << "<unknown>";
-    OS << ':' << DLT.getLineNumber();
-    if (DLT.getColumnNumber() != 0)
-      OS << ':' << DLT.getColumnNumber();
+    OS << ':' << debugLoc.getLine();
+    if (debugLoc.getCol() != 0)
+      OS << ':' << debugLoc.getCol();
   }
 
   OS << "\n";
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 2255dc339657..b79cdbb660de 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -125,7 +125,7 @@ MachineInstr *InsertNewDef(unsigned Opcode,
                            const TargetRegisterClass *RC,
                            MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
   unsigned NewVR = MRI->createVirtualRegister(RC);
-  return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR);
+  return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
 }
                           
 /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e4e9ef405f63..d6f8a205c1f6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -340,8 +340,8 @@ bool FastISel::SelectCall(User *I) {
       StaticAllocaMap.find(AI);
     if (SI == StaticAllocaMap.end()) break; // VLAs.
     int FI = SI->second;
-    if (MDNode *Dbg = DI->getDbgMetadata())
-      MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+    if (!DI->getDebugLoc().isUnknown())
+      MMI->setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
     
     // Building the map above is target independent.  Generating DBG_VALUE
     // inline is target dependent; do this now.
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 7638ea2ae162..9d1568f01f40 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/System/DataTypes.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0ba65ab7f96d..3643ea7c5315 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -794,8 +794,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 // EntryNode could meaningfully have debug info if we can find it...
 SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
   : TLI(tli), FLI(fli), DW(0),
-    EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
-              getVTList(MVT::Other)),
+    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
@@ -919,8 +918,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   if (VT.isVector()) {
     SmallVector<SDValue, 8> Ops;
     Ops.assign(VT.getVectorNumElements(), Result);
-    Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
-                     VT, &Ops[0], Ops.size());
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
   }
   return Result;
 }
@@ -963,8 +961,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
     SmallVector<SDValue, 8> Ops;
     Ops.assign(VT.getVectorNumElements(), Result);
     // FIXME DebugLoc info might be appropriate here
-    Result = getNode(ISD::BUILD_VECTOR, DebugLoc::getUnknownLoc(),
-                     VT, &Ops[0], Ops.size());
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
   }
   return Result;
 }
@@ -3094,6 +3091,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               DebugLoc dl) {
+  assert(Value.getOpcode() != ISD::UNDEF);
+
   unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     APInt Val = APInt(NumBits, C->getZExtValue() & 255);
@@ -3197,7 +3196,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
-                                     bool SafeToUseFP,
+                                     bool NonScalarIntSafe,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3207,7 +3206,8 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // the inferred alignment of the source. 'DstAlign', on the other hand, is the
   // specified alignment of the memory operation. If it is zero, that means
   // it's possible to change the alignment of the destination.
-  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG);
+  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+                                   NonScalarIntSafe, DAG);
 
   if (VT == MVT::Other) {
     VT = TLI.getPointerTy();
@@ -3266,10 +3266,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                        unsigned Align, bool AlwaysInline,
                                        const Value *DstSV, uint64_t DstSVOff,
                                        const Value *SrcSV, uint64_t SrcSVOff) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // Turn a memcpy of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
 
   // Expand memcpy to a series of load and store ops if the size operand falls
   // below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
@@ -3352,10 +3355,13 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
                                         unsigned Align,bool AlwaysInline,
                                         const Value *DstSV, uint64_t DstSVOff,
                                         const Value *SrcSV, uint64_t SrcSVOff) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // Turn a memmove of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
 
   // Expand memmove to a series of load and store ops if the size operand falls
   // below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
@@ -3426,21 +3432,24 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                                SDValue Src, uint64_t Size,
                                unsigned Align,
                                const Value *DstSV, uint64_t DstSVOff) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  // Turn a memset of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
 
   // Expand memset to a series of load/store ops if the size operand
   // falls below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
-  bool IsZero = isa<ConstantSDNode>(Src) &&
-    cast<ConstantSDNode>(Src)->isNullValue();
+  bool NonScalarIntSafe =
+    isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                IsZero, DAG, TLI))
+                                NonScalarIntSafe, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3592,9 +3601,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
     if (ConstantSize->isNullValue())
       return Chain;
 
-    SDValue Result =
-      getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
-                      Align, DstSV, DstSVOff);
+    SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
+                                     ConstantSize->getZExtValue(),
+                                     Align, DstSV, DstSVOff);
     if (Result.getNode())
       return Result;
   }
@@ -5323,8 +5332,7 @@ HandleSDNode::~HandleSDNode() {
 
 GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
                                          EVT VT, int64_t o, unsigned char TF)
-  : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
-    Offset(o), TargetFlags(TF) {
+  : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) {
   TheGlobal = const_cast<GlobalValue*>(GA);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 922c6e8b02a9..879bdb2cbbb7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -546,7 +546,7 @@ void SelectionDAGBuilder::clear() {
   PendingExports.clear();
   EdgeMapping.clear();
   DAG.clear();
-  CurDebugLoc = DebugLoc::getUnknownLoc();
+  CurDebugLoc = DebugLoc();
   HasTailCall = false;
 }
 
@@ -3800,8 +3800,8 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     int FI = SI->second;
 
     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
-      if (MDNode *Dbg = DI.getDbgMetadata())
-        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+      if (!DI.getDebugLoc().isUnknown())
+        MMI->setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
     return 0;
   }
   case Intrinsic::dbg_value: {
@@ -3851,9 +3851,10 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     if (SI == FuncInfo.StaticAllocaMap.end())
       return 0; // VLAs.
     int FI = SI->second;
+    
     if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
-      if (MDNode *Dbg = DI.getDbgMetadata())
-        MMI->setVariableDbgInfo(Variable, FI, Dbg);
+      if (!DI.getDebugLoc().isUnknown())
+        MMI->setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
     return 0;
   }
   case Intrinsic::eh_exception: {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bc4b33dff12f..9f027729b75e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -306,10 +306,8 @@ public:
   SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli,
                       FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
-    : CurDebugLoc(DebugLoc::getUnknownLoc()), SDNodeOrder(0),
-      TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
-      HasTailCall(false),
-      Context(dag.getContext()) {
+    : SDNodeOrder(0), TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false), Context(dag.getContext()) {
   }
 
   void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index ea96b2179999..d54566b8cd3a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -368,28 +368,25 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 /// attached with this instruction.
 static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB,
                         FastISel *FastIS, MachineFunction *MF) {
-  MDNode *Dbg = I->getDbgMetadata();
-  if (Dbg == 0) return;
+  DebugLoc DL = I->getDebugLoc();
+  if (DL.isUnknown()) return;
   
-  DILocation DILoc(Dbg);
-  DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
-
-  SDB->setCurDebugLoc(Loc);
+  SDB->setCurDebugLoc(DL);
 
   if (FastIS)
-    FastIS->setCurDebugLoc(Loc);
+    FastIS->setCurDebugLoc(DL);
 
   // If the function doesn't have a default debug location yet, set
   // it. This is kind of a hack.
   if (MF->getDefaultDebugLoc().isUnknown())
-    MF->setDefaultDebugLoc(Loc);
+    MF->setDefaultDebugLoc(DL);
 }
 
 /// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
 static void ResetDebugLoc(SelectionDAGBuilder *SDB, FastISel *FastIS) {
-  SDB->setCurDebugLoc(DebugLoc::getUnknownLoc());
+  SDB->setCurDebugLoc(DebugLoc());
   if (FastIS)
-    FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc());
+    FastIS->setCurDebugLoc(DebugLoc());
 }
 
 void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 83acb5df9768..a2df2d09b85c 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -821,21 +821,20 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
 }
 
 void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
-  if (!DL.isUnknown()) {
-    DILocation CurDLT = EmissionDetails.MF->getDILocation(DL);
-
-    if (BeforePrintingInsn) {
-      if (CurDLT.getScope().getNode() != 0 
-          && PrevDLT.getNode() != CurDLT.getNode()) {
-        JITEvent_EmittedFunctionDetails::LineStart NextLine;
-        NextLine.Address = getCurrentPCValue();
-        NextLine.Loc = DL;
-        EmissionDetails.LineStarts.push_back(NextLine);
-      }
+  if (DL.isUnknown()) return;
+  if (!BeforePrintingInsn) return;
 
-      PrevDLT = CurDLT;
-    }
+  // FIXME: This is horribly inefficient.
+  DILocation CurDLT(DL.getAsMDNode(CurFn->getContext()));
+  
+  if (CurDLT.getScope().getNode() != 0 && PrevDLT.getNode() !=CurDLT.getNode()){
+    JITEvent_EmittedFunctionDetails::LineStart NextLine;
+    NextLine.Address = getCurrentPCValue();
+    NextLine.Loc = DL;
+    EmissionDetails.LineStarts.push_back(NextLine);
   }
+
+  PrevDLT = CurDLT;
 }
 
 static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 0a0b0ea18540..1995f79fa373 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -312,7 +312,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                MachineBasicBlock *FBB,
                              const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
 
   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
   int BOpc   = !AFI->isThumbFunction()
@@ -653,7 +653,7 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                unsigned DestReg, unsigned SrcReg,
                                const TargetRegisterClass *DestRC,
                                const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   // tGPR is used sometimes in ARM instructions that need to avoid using
@@ -715,7 +715,7 @@ void ARMBaseInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -769,7 +769,7 @@ void ARMBaseInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b380c954d606..f1625469085e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1277,8 +1277,7 @@ emitPrologue(MachineFunction &MF) const {
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 1c5bd42d63da..13d8b74014c5 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -399,8 +399,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
     // aligned.
     assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
     MachineInstr *CPEMI =
-      BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
-                           .addImm(i).addConstantPoolIndex(i).addImm(Size);
+      BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+        .addImm(i).addConstantPoolIndex(i).addImm(Size);
     CPEMIs.push_back(CPEMI);
 
     // Add a new CPEntry, but no corresponding CPUser yet.
@@ -721,7 +721,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond to anything in the source.
   unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
-  BuildMI(OrigBB, DebugLoc::getUnknownLoc(), TII->get(Opc)).addMBB(NewBB);
+  BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
   NumSplit++;
 
   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
@@ -1103,8 +1103,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     // targets will be exchanged, and the altered branch may be out of
     // range, so the machinery has to know about it.
     int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
-    BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
-            TII->get(UncondBr)).addMBB(NewMBB);
+    BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
     unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
     ImmBranches.push_back(ImmBranch(&UserMBB->back(),
                           MaxDisp, false, UncondBr));
@@ -1244,8 +1243,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
   // Now that we have an island to add the CPE to, clone the original CPE and
   // add it to the island.
   U.HighWaterMark = NewIsland;
-  U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
-                    TII->get(ARM::CONSTPOOL_ENTRY))
+  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
                 .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
   CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
   NumCPEs++;
@@ -1446,12 +1444,11 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
 
   // Insert a new conditional branch and a new unconditional branch.
   // Also update the ImmBranch as well as adding a new entry for the new branch.
-  BuildMI(MBB, DebugLoc::getUnknownLoc(),
-          TII->get(MI->getOpcode()))
+  BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
     .addMBB(NextBB).addImm(CC).addReg(CCReg);
   Br.MI = &MBB->back();
   BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
-  BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+  BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
   BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
@@ -1809,7 +1806,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond directly to anything in the source.
   assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
-  BuildMI(NewBB, DebugLoc::getUnknownLoc(), TII->get(ARM::t2B)).addMBB(BB);
+  BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
 
   // Update internal data structures to account for the newly inserted MBB.
   MF.RenumberBlocks(NewBB);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 89c776909078..cf55377bc677 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
@@ -46,11 +47,11 @@ private:
 
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
 
-  bool MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack);
+  bool MaybeParseRegister(OwningPtr<ARMOperand> &Op, bool ParseWriteBack);
 
-  bool ParseRegisterList(ARMOperand &Op);
+  bool ParseRegisterList(OwningPtr<ARMOperand> &Op);
 
-  bool ParseMemory(ARMOperand &Op);
+  bool ParseMemory(OwningPtr<ARMOperand> &Op);
 
   bool ParseMemoryOffsetReg(bool &Negative,
                             bool &OffsetRegShifted,
@@ -58,11 +59,12 @@ private:
                             const MCExpr *&ShiftAmount,
                             const MCExpr *&Offset,
                             bool &OffsetIsReg,
-                            int &OffsetRegNum);
+                            int &OffsetRegNum,
+                            SMLoc &E);
 
-  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount);
+  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
 
-  bool ParseOperand(ARMOperand &Op);
+  bool ParseOperand(OwningPtr<ARMOperand> &Op);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
@@ -104,13 +106,17 @@ public:
 /// ARMOperand - Instances of this class represent a parsed ARM machine
 /// instruction.
 struct ARMOperand : public MCParsedAsmOperand {
-  enum {
+private:
+  ARMOperand() {}
+public:
+  enum KindTy {
     Token,
     Register,
     Immediate,
     Memory
   } Kind;
 
+  SMLoc StartLoc, EndLoc;
 
   union {
     struct {
@@ -126,7 +132,7 @@ struct ARMOperand : public MCParsedAsmOperand {
     struct {
       const MCExpr *Val;
     } Imm;
-
+    
     // This is for all forms of ARM address expressions
     struct {
       unsigned BaseRegNum;
@@ -144,6 +150,34 @@ struct ARMOperand : public MCParsedAsmOperand {
     } Mem;
 
   };
+  
+  ARMOperand(KindTy K, SMLoc S, SMLoc E)
+    : Kind(K), StartLoc(S), EndLoc(E) {}
+  
+  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case Token:
+    Tok = o.Tok;
+      break;
+    case Register:
+      Reg = o.Reg;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Memory:
+      Mem = o.Mem;
+      break;
+    }
+  }
+  
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
 
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
@@ -169,48 +203,60 @@ struct ARMOperand : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
-  static ARMOperand CreateToken(StringRef Str) {
-    ARMOperand Res;
-    Res.Kind = Token;
-    Res.Tok.Data = Str.data();
-    Res.Tok.Length = Str.size();
-    return Res;
+  static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
+                          SMLoc S) {
+    Op.reset(new ARMOperand);
+    Op->Kind = Token;
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
   }
 
-  static ARMOperand CreateReg(unsigned RegNum, bool Writeback) {
-    ARMOperand Res;
-    Res.Kind = Register;
-    Res.Reg.RegNum = RegNum;
-    Res.Reg.Writeback = Writeback;
-    return Res;
+  static void CreateReg(OwningPtr<ARMOperand> &Op, unsigned RegNum, 
+                        bool Writeback, SMLoc S, SMLoc E) {
+    Op.reset(new ARMOperand);
+    Op->Kind = Register;
+    Op->Reg.RegNum = RegNum;
+    Op->Reg.Writeback = Writeback;
+    
+    Op->StartLoc = S;
+    Op->EndLoc = E;
   }
 
-  static ARMOperand CreateImm(const MCExpr *Val) {
-    ARMOperand Res;
-    Res.Kind = Immediate;
-    Res.Imm.Val = Val;
-    return Res;
+  static void CreateImm(OwningPtr<ARMOperand> &Op, const MCExpr *Val,
+                        SMLoc S, SMLoc E) {
+    Op.reset(new ARMOperand);
+    Op->Kind = Immediate;
+    Op->Imm.Val = Val;
+    
+    Op->StartLoc = S;
+    Op->EndLoc = E;
   }
 
-  static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
-                              const MCExpr *Offset, unsigned OffsetRegNum,
-                              bool OffsetRegShifted, enum ShiftType ShiftType,
-                              const MCExpr *ShiftAmount, bool Preindexed,
-                              bool Postindexed, bool Negative, bool Writeback) {
-    ARMOperand Res;
-    Res.Kind = Memory;
-    Res.Mem.BaseRegNum = BaseRegNum;
-    Res.Mem.OffsetIsReg = OffsetIsReg;
-    Res.Mem.Offset = Offset;
-    Res.Mem.OffsetRegNum = OffsetRegNum;
-    Res.Mem.OffsetRegShifted = OffsetRegShifted;
-    Res.Mem.ShiftType = ShiftType;
-    Res.Mem.ShiftAmount = ShiftAmount;
-    Res.Mem.Preindexed = Preindexed;
-    Res.Mem.Postindexed = Postindexed;
-    Res.Mem.Negative = Negative;
-    Res.Mem.Writeback = Writeback;
-    return Res;
+  static void CreateMem(OwningPtr<ARMOperand> &Op,
+                        unsigned BaseRegNum, bool OffsetIsReg,
+                        const MCExpr *Offset, unsigned OffsetRegNum,
+                        bool OffsetRegShifted, enum ShiftType ShiftType,
+                        const MCExpr *ShiftAmount, bool Preindexed,
+                        bool Postindexed, bool Negative, bool Writeback,
+                        SMLoc S, SMLoc E) {
+    Op.reset(new ARMOperand);
+    Op->Kind = Memory;
+    Op->Mem.BaseRegNum = BaseRegNum;
+    Op->Mem.OffsetIsReg = OffsetIsReg;
+    Op->Mem.Offset = Offset;
+    Op->Mem.OffsetRegNum = OffsetRegNum;
+    Op->Mem.OffsetRegShifted = OffsetRegShifted;
+    Op->Mem.ShiftType = ShiftType;
+    Op->Mem.ShiftAmount = ShiftAmount;
+    Op->Mem.Preindexed = Preindexed;
+    Op->Mem.Postindexed = Postindexed;
+    Op->Mem.Negative = Negative;
+    Op->Mem.Writeback = Writeback;
+    
+    Op->StartLoc = S;
+    Op->EndLoc = E;
   }
 };
 
@@ -221,7 +267,9 @@ struct ARMOperand : public MCParsedAsmOperand {
 /// and false is returned.  Else true is returned and no token is eaten.
 /// TODO this is likely to change to allow different register types and or to
 /// parse for a specific register type.
-bool ARMAsmParser::MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack) {
+bool ARMAsmParser::MaybeParseRegister
+  (OwningPtr<ARMOperand> &Op, bool ParseWriteBack) {
+  SMLoc S, E;
   const AsmToken &Tok = Parser.getTok();
   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 
@@ -232,27 +280,35 @@ bool ARMAsmParser::MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack) {
   RegNum = MatchRegisterName(Tok.getString());
   if (RegNum == -1)
     return true;
+  
+  S = Tok.getLoc();
+  
   Parser.Lex(); // Eat identifier token.
+    
+  E = Parser.getTok().getLoc();
 
   bool Writeback = false;
   if (ParseWriteBack) {
     const AsmToken &ExclaimTok = Parser.getTok();
     if (ExclaimTok.is(AsmToken::Exclaim)) {
+      E = ExclaimTok.getLoc();
       Writeback = true;
       Parser.Lex(); // Eat exclaim token
     }
   }
 
-  Op = ARMOperand::CreateReg(RegNum, Writeback);
+  ARMOperand::CreateReg(Op, RegNum, Writeback, S, E);
 
   return false;
 }
 
 /// Parse a register list, return false if successful else return true or an 
 /// error.  The first token must be a '{' when called.
-bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
+bool ARMAsmParser::ParseRegisterList(OwningPtr<ARMOperand> &Op) {
+  SMLoc S, E;
   assert(Parser.getTok().is(AsmToken::LCurly) &&
          "Token is not an Left Curly Brace");
+  S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat left curly brace token.
 
   const AsmToken &RegTok = Parser.getTok();
@@ -290,6 +346,7 @@ bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
   const AsmToken &RCurlyTok = Parser.getTok();
   if (RCurlyTok.isNot(AsmToken::RCurly))
     return Error(RCurlyTok.getLoc(), "'}' expected");
+  E = RCurlyTok.getLoc();
   Parser.Lex(); // Eat left curly brace token.
 
   return false;
@@ -299,9 +356,11 @@ bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) {
 /// or an error.  The first token must be a '[' when called.
 /// TODO Only preindexing and postindexing addressing are started, unindexed
 /// with option, etc are still to do.
-bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
+bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
+  SMLoc S, E;
   assert(Parser.getTok().is(AsmToken::LBrac) &&
          "Token is not an Left Bracket");
+  S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat left bracket token.
 
   const AsmToken &BaseRegTok = Parser.getTok();
@@ -309,7 +368,7 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
     return Error(BaseRegTok.getLoc(), "register expected");
   if (MaybeParseRegister(Op, false))
     return Error(BaseRegTok.getLoc(), "register expected");
-  int BaseRegNum = Op.getReg();
+  int BaseRegNum = Op->getReg();
 
   bool Preindexed = false;
   bool Postindexed = false;
@@ -329,21 +388,23 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
     const MCExpr *ShiftAmount;
     const MCExpr *Offset;
     if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
-                            Offset, OffsetIsReg, OffsetRegNum))
+                            Offset, OffsetIsReg, OffsetRegNum, E))
       return true;
     const AsmToken &RBracTok = Parser.getTok();
     if (RBracTok.isNot(AsmToken::RBrac))
       return Error(RBracTok.getLoc(), "']' expected");
+    E = RBracTok.getLoc();
     Parser.Lex(); // Eat right bracket token.
 
     const AsmToken &ExclaimTok = Parser.getTok();
     if (ExclaimTok.is(AsmToken::Exclaim)) {
+      E = ExclaimTok.getLoc();
       Writeback = true;
       Parser.Lex(); // Eat exclaim token
     }
-    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
-                               OffsetRegShifted, ShiftType, ShiftAmount,
-                               Preindexed, Postindexed, Negative, Writeback);
+    ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
+                          OffsetRegShifted, ShiftType, ShiftAmount,
+                          Preindexed, Postindexed, Negative, Writeback, S, E);
     return false;
   }
   // The "[Rn" we have so far was not followed by a comma.
@@ -352,6 +413,7 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
     // the "[Rn".
     Postindexed = true;
     Writeback = true;
+    E = Tok.getLoc();
     Parser.Lex(); // Eat right bracket token.
 
     int OffsetRegNum = 0;
@@ -366,13 +428,14 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
 	return Error(NextTok.getLoc(), "',' expected");
       Parser.Lex(); // Eat comma token.
       if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
-                              ShiftAmount, Offset, OffsetIsReg, OffsetRegNum))
+                              ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, 
+                              E))
         return true;
     }
 
-    Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
-                               OffsetRegShifted, ShiftType, ShiftAmount,
-                               Preindexed, Postindexed, Negative, Writeback);
+    ARMOperand::CreateMem(Op, BaseRegNum, OffsetIsReg, Offset, OffsetRegNum,
+                          OffsetRegShifted, ShiftType, ShiftAmount,
+                          Preindexed, Postindexed, Negative, Writeback, S, E);
     return false;
   }
 
@@ -387,18 +450,20 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) {
 ///   #offset
 /// we return false on success or an error otherwise.
 bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
-					bool &OffsetRegShifted,
+                                        bool &OffsetRegShifted,
                                         enum ShiftType &ShiftType,
                                         const MCExpr *&ShiftAmount,
                                         const MCExpr *&Offset,
                                         bool &OffsetIsReg,
-                                        int &OffsetRegNum) {
-  ARMOperand Op;
+                                        int &OffsetRegNum,
+                                        SMLoc &E) {
+  OwningPtr<ARMOperand> Op;
   Negative = false;
   OffsetRegShifted = false;
   OffsetIsReg = false;
   OffsetRegNum = -1;
   const AsmToken &NextTok = Parser.getTok();
+  E = NextTok.getLoc();
   if (NextTok.is(AsmToken::Plus))
     Parser.Lex(); // Eat plus token.
   else if (NextTok.is(AsmToken::Minus)) {
@@ -409,8 +474,10 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
   const AsmToken &OffsetRegTok = Parser.getTok();
   if (OffsetRegTok.is(AsmToken::Identifier)) {
     OffsetIsReg = !MaybeParseRegister(Op, false);
-    if (OffsetIsReg)
-      OffsetRegNum = Op.getReg();
+    if (OffsetIsReg) {
+      E = Op->getEndLoc();
+      OffsetRegNum = Op->getReg();
+    }
   }
   // If we parsed a register as the offset then their can be a shift after that
   if (OffsetRegNum != -1) {
@@ -420,7 +487,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
       Parser.Lex(); // Eat comma token.
 
       const AsmToken &Tok = Parser.getTok();
-      if (ParseShift(ShiftType, ShiftAmount))
+      if (ParseShift(ShiftType, ShiftAmount, E))
 	return Error(Tok.getLoc(), "shift expected");
       OffsetRegShifted = true;
     }
@@ -430,10 +497,12 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
     const AsmToken &HashTok = Parser.getTok();
     if (HashTok.isNot(AsmToken::Hash))
       return Error(HashTok.getLoc(), "'#' expected");
+    
     Parser.Lex(); // Eat hash token.
 
     if (getParser().ParseExpression(Offset))
      return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
   }
   return false;
 }
@@ -442,7 +511,9 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
 ///   ( lsl | lsr | asr | ror ) , # shift_amount
 ///   rrx
 /// and returns true if it parses a shift otherwise it returns false.
-bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount) {
+bool ARMAsmParser::ParseShift(ShiftType &St, 
+                              const MCExpr *&ShiftAmount, 
+                              SMLoc &E) {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
     return true;
@@ -550,7 +621,9 @@ MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
 
 /// Parse a arm instruction operand.  For now this parses the operand regardless
 /// of the mnemonic.
-bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
+bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
+  SMLoc S, E;
+  
   switch (getLexer().getKind()) {
   case AsmToken::Identifier:
     if (!MaybeParseRegister(Op, true))
@@ -558,9 +631,11 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
     // This was not a register so parse other operands that start with an
     // identifier (like labels) as expressions and create them as immediates.
     const MCExpr *IdVal;
+    S = Parser.getTok().getLoc();
     if (getParser().ParseExpression(IdVal))
       return true;
-    Op = ARMOperand::CreateImm(IdVal);
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    ARMOperand::CreateImm(Op, IdVal, S, E);
     return false;
   case AsmToken::LBrac:
     return ParseMemory(Op);
@@ -569,11 +644,13 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
   case AsmToken::Hash:
     // #42 -> immediate.
     // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+    S = Parser.getTok().getLoc();
     Parser.Lex();
     const MCExpr *ImmVal;
     if (getParser().ParseExpression(ImmVal))
       return true;
-    Op = ARMOperand::CreateImm(ImmVal);
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    ARMOperand::CreateImm(Op, ImmVal, S, E);
     return false;
   default:
     return Error(Parser.getTok().getLoc(), "unexpected token in operand");
@@ -583,22 +660,25 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) {
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  Operands.push_back(new ARMOperand(ARMOperand::CreateToken(Name)));
+  OwningPtr<ARMOperand> Op;
+  ARMOperand::CreateToken(Op, Name, NameLoc);
+  
+  Operands.push_back(Op.take());
 
   SMLoc Loc = Parser.getTok().getLoc();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 
     // Read the first operand.
-    ARMOperand Op;
+    OwningPtr<ARMOperand> Op;
     if (ParseOperand(Op)) return true;
-    Operands.push_back(new ARMOperand(Op));
+    Operands.push_back(Op.take());
 
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();  // Eat the comma.
 
       // Parse and remember the operand.
       if (ParseOperand(Op)) return true;
-      Operands.push_back(new ARMOperand(Op));
+      Operands.push_back(Op.take());
     }
   }
   return false;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 000000000000..04313400b8d9
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,532 @@
+//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to implement the public interfaces of ARMDisassembler and
+// ThumbDisassembler, both of which are instances of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassembler.h"
+#include "ARMDisassemblerCore.h"
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
+/// ARMDecoderEmitter.cpp TableGen backend.  It contains:
+///
+/// o Mappings from opcode to ARM/Thumb instruction format
+///
+/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
+/// for an ARM instruction.
+///
+/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
+/// function for a Thumb instruction.
+///
+#include "../ARMGenDecoderTables.inc"
+
+namespace llvm {
+
+/// showBitVector - Use the raw_ostream to log a diagnostic message describing
+/// the inidividual bits of the instruction.
+///
+static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
+  // Split the bit position markers into more than one lines to fit 80 columns.
+  os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
+     << " 10  9  8  7  6  5  4  3  2  1  0 \n";
+  os << "---------------------------------------------------------------"
+     << "----------------------------------\n";
+  os << '|';
+  for (unsigned i = 32; i != 0; --i) {
+    if (insn >> (i - 1) & 0x01)
+      os << " 1";
+    else
+      os << " 0";
+    os << (i%4 == 1 ? '|' : ':');
+  }
+  os << '\n';
+  // Split the bit position markers into more than one lines to fit 80 columns.
+  os << "---------------------------------------------------------------"
+     << "----------------------------------\n";
+  os << '\n';
+}
+
+/// decodeARMInstruction is a decorator function which tries special cases of
+/// instruction matching before calling the auto-generated decoder function.
+static unsigned decodeARMInstruction(uint32_t &insn) {
+  if (slice(insn, 31, 28) == 15)
+    goto AutoGenedDecoder;
+
+  // Special case processing, if any, goes here....
+
+  // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
+  // The insufficient encoding information of the combined instruction confuses
+  // the decoder wrt BFC/BFI.  Therefore, we try to recover here.
+  // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
+  // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
+  if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
+    if (slice(insn, 3, 0) == 15)
+      return ARM::BFC;
+    else
+      return ARM::BFI;
+  }
+
+  // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
+  // As a result, the decoder fails to decode UMULL properly.
+  if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
+    return ARM::UMULL;
+  }
+
+  // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
+  // As a result, the decoder fails to decode SBFX properly.
+  if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
+    return ARM::SBFX;
+
+  // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
+  // As a result, the decoder fails to decode UBFX properly.
+  if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
+    return ARM::UBFX;
+
+  // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
+  // As a result, the decoder fails to deocode SSAT properly.
+  if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
+    return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
+
+  // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
+  // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
+  if (slice(insn, 27, 24) == 0) {
+    switch (slice(insn, 21, 20)) {
+    case 2:
+      switch (slice(insn, 7, 4)) {
+      case 11:
+        return ARM::STRHT;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 3:
+      switch (slice(insn, 7, 4)) {
+      case 11:
+        return ARM::LDRHT;
+      case 13:
+        return ARM::LDRSBT;
+      case 15:
+        return ARM::LDRSHT;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break;   // fallthrough
+    }
+  }
+
+  // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
+  // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
+  // properly.
+  if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
+    unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+    switch (slice(insn, 7, 4)) {
+    case 11:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::STRH;
+      case 3: // Pre-indexed
+        return ARM::STRH_PRE;
+      case 0: // Post-indexed
+        return ARM::STRH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 13:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRD;
+      case 3: // Pre-indexed
+        return ARM::LDRD_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRD_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 15:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::STRD;
+      case 3: // Pre-indexed
+        return ARM::STRD_PRE;
+      case 0: // Post-indexed
+        return ARM::STRD_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break; // fallthrough
+    }
+  }
+
+  // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
+  // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
+  // properly.
+  if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
+    unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+    switch (slice(insn, 7, 4)) {
+    case 11:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRH;
+      case 3: // Pre-indexed
+        return ARM::LDRH_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 13:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRSB;
+      case 3: // Pre-indexed
+        return ARM::LDRSB_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRSB_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 15:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRSH;
+      case 3: // Pre-indexed
+        return ARM::LDRSH_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRSH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break; // fallthrough
+    }
+  }
+
+AutoGenedDecoder:
+  // Calling the auto-generated decoder function.
+  return decodeInstruction(insn);
+}
+
+// Helper function for special case handling of LDR (literal) and friends.
+// See, for example, A6.3.7 Load word: Table A6-18 Load word.
+// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return Opcode; // Return unmorphed opcode.
+
+  case ARM::t2LDRDi8:
+    return ARM::t2LDRDpci;
+
+  case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
+  case ARM::t2LDRi12:     case ARM::t2LDRi8:
+  case ARM::t2LDRs:
+    return ARM::t2LDRpci;
+
+  case ARM::t2LDRB_POST:  case ARM::t2LDRB_PRE:
+  case ARM::t2LDRBi12:    case ARM::t2LDRBi8:
+  case ARM::t2LDRBs:
+    return ARM::t2LDRBpci;
+
+  case ARM::t2LDRH_POST:  case ARM::t2LDRH_PRE:
+  case ARM::t2LDRHi12:    case ARM::t2LDRHi8:
+  case ARM::t2LDRHs:
+    return ARM::t2LDRHpci;
+
+  case ARM::t2LDRSB_POST:  case ARM::t2LDRSB_PRE:
+  case ARM::t2LDRSBi12:    case ARM::t2LDRSBi8:
+  case ARM::t2LDRSBs:
+    return ARM::t2LDRSBpci;
+
+  case ARM::t2LDRSH_POST:  case ARM::t2LDRSH_PRE:
+  case ARM::t2LDRSHi12:    case ARM::t2LDRSHi8:
+  case ARM::t2LDRSHs:
+    return ARM::t2LDRSHpci;
+  }
+}
+
+/// decodeThumbSideEffect is a decorator function which can potentially twiddle
+/// the instruction or morph the returned opcode under Thumb2.
+///
+/// First it checks whether the insn is a NEON or VFP instr; if true, bit
+/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
+/// equivalent instruction and decodeInstruction is called with the transformed
+/// insn.
+///
+/// Next, there is special handling for Load byte/halfword/word instruction by
+/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
+/// Thumb2 instruction.  See comments below for further details.
+///
+/// Finally, one last check is made to see whether the insn is a NEON/VFP and
+/// decodeInstruction(insn) is invoked on the original insn.
+///
+/// Otherwise, decodeThumbInstruction is called with the original insn.
+static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
+  if (IsThumb2) {
+    uint16_t op1 = slice(insn, 28, 27);
+    uint16_t op2 = slice(insn, 26, 20);
+
+    // A6.3 32-bit Thumb instruction encoding
+    // Table A6-9 32-bit Thumb instruction encoding
+
+    // The coprocessor instructions of interest are transformed to their ARM
+    // equivalents.
+
+    // --------- Transform Begin Marker ---------
+    if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
+      // A7.4 Advanced SIMD data-processing instructions
+      // U bit of Thumb corresponds to Inst{24} of ARM.
+      uint16_t U = slice(op1, 1, 1);
+
+      // Inst{28-24} of ARM = {1,0,0,1,U};
+      uint16_t bits28_24 = 9 << 1 | U;
+      DEBUG(showBitVector(errs(), insn));
+      setSlice(insn, 28, 24, bits28_24);
+      return decodeInstruction(insn);
+    }
+
+    if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
+      // A7.7 Advanced SIMD element or structure load/store instructions
+      // Inst{27-24} of Thumb = 0b1001
+      // Inst{27-24} of ARM   = 0b0100
+      DEBUG(showBitVector(errs(), insn));
+      setSlice(insn, 27, 24, 4);
+      return decodeInstruction(insn);
+    }
+    // --------- Transform End Marker ---------
+
+    // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+    // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+    // before returning it to our caller.
+    if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
+        && slice(insn, 19, 16) == 15)
+      return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
+
+    // One last check for NEON/VFP instructions.
+    if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
+      return decodeInstruction(insn);
+
+    // Fall through.
+  }
+
+  return decodeThumbInstruction(insn);
+}
+
+static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2PLDi12:   case ARM::t2PLDi8:
+  case ARM::t2PLDr:     case ARM::t2PLDs:
+  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
+  case ARM::t2PLDWr:    case ARM::t2PLDWs:
+  case ARM::t2PLIi12:   case ARM::t2PLIi8:
+  case ARM::t2PLIr:     case ARM::t2PLIs:
+    return true;
+  }
+}
+
+static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return 0;
+  case ARM::t2PLDi12:   case ARM::t2PLDi8:
+  case ARM::t2PLDr:     case ARM::t2PLDs:
+    return ARM::t2PLDpci;
+  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
+  case ARM::t2PLDWr:    case ARM::t2PLDWs:
+    return ARM::t2PLDWpci;
+  case ARM::t2PLIi12:   case ARM::t2PLIi8:
+  case ARM::t2PLIr:     case ARM::t2PLIs:
+    return ARM::t2PLIpci;
+  }
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool ARMDisassembler::getInstruction(MCInst &MI,
+                                     uint64_t &Size,
+                                     const MemoryObject &Region,
+                                     uint64_t Address,
+                                     raw_ostream &os) const {
+  // The machine instruction.
+  uint32_t insn;
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t*)&insn, NULL) == -1)
+    return false;
+
+  unsigned Opcode = decodeARMInstruction(insn);
+  ARMFormat Format = ARMFormats[Opcode];
+  Size = 4;
+
+  DEBUG({
+      errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+             << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+             << ")\n";
+      showBitVector(errs(), insn);
+    });
+
+  ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+
+  if (!Builder)
+    return false;
+
+  if (!Builder->Build(MI, insn))
+    return false;
+
+  delete Builder;
+
+  return true;
+}
+
+bool ThumbDisassembler::getInstruction(MCInst &MI,
+                                       uint64_t &Size,
+                                       const MemoryObject &Region,
+                                       uint64_t Address,
+                                       raw_ostream &os) const {
+  // The machine instruction.
+  uint32_t insn = 0;
+  uint32_t insn1 = 0;
+
+  // A6.1 Thumb instruction set encoding
+  //
+  // If bits [15:11] of the halfword being decoded take any of the following
+  // values, the halfword is the first halfword of a 32-bit instruction:
+  // o 0b11101
+  // o 0b11110
+  // o 0b11111.
+  //
+  // Otherwise, the halfword is a 16-bit instruction.
+
+  // Read 2 bytes of data first.
+  if (Region.readBytes(Address, 2, (uint8_t*)&insn, NULL) == -1)
+    return false;
+
+  unsigned bits15_11 = slice(insn, 15, 11);
+  bool IsThumb2 = false;
+
+  // 32-bit instructions if the bits [15:11] of the halfword matches
+  // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
+  if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
+    IsThumb2 = true;
+    if (Region.readBytes(Address + 2, 2, (uint8_t*)&insn1, NULL) == -1)
+      return false;
+    insn = (insn << 16 | insn1);
+  }
+
+  // The insn could potentially be bit-twiddled in order to be decoded as an ARM
+  // NEON/VFP opcode.  In such case, the modified insn is later disassembled as
+  // an ARM NEON/VFP instruction.
+  //
+  // This is a short term solution for lack of encoding bits specified for the
+  // Thumb2 NEON/VFP instructions.  The long term solution could be adding some
+  // infrastructure to have each instruction support more than one encodings.
+  // Which encoding is used would be based on which subtarget the compiler/
+  // disassembler is working with at the time.  This would allow the sharing of
+  // the NEON patterns between ARM and Thumb2, as well as potential greater
+  // sharing between the regular ARM instructions and the 32-bit wide Thumb2
+  // instructions as well.
+  unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
+
+  // A8.6.117/119/120/121.
+  // PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant.
+  if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15)
+    Opcode = T2Morph2Preload2PCI(Opcode);
+
+  ARMFormat Format = ARMFormats[Opcode];
+  Size = IsThumb2 ? 4 : 2;
+
+  DEBUG({
+      errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+             << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+             << ")\n";
+      showBitVector(errs(), insn);
+    });
+
+  ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+  Builder->setSession(const_cast<Session *>(&SO));
+
+  if (!Builder)
+    return false;
+
+  if (!Builder->Build(MI, insn))
+    return false;
+
+  delete Builder;
+
+  return true;
+}
+
+// A8.6.50
+static unsigned short CountITSize(unsigned ITMask) {
+  // First count the trailing zeros of the IT mask.
+  unsigned TZ = CountTrailingZeros_32(ITMask);
+  assert(TZ <= 3 && "Encoding error");
+  return (4 - TZ);
+}
+
+/// Init ITState.
+void Session::InitIT(unsigned short bits7_0) {
+  ITCounter = CountITSize(slice(bits7_0, 3, 0));
+  ITState = bits7_0;
+}
+
+/// Update ITState if necessary.
+void Session::UpdateIT() {
+  assert(ITCounter);
+  --ITCounter;
+  if (ITCounter == 0)
+    ITState = 0;
+  else {
+    unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
+    setSlice(ITState, 4, 0, NewITState4_0);
+  }
+}
+
+static MCDisassembler *createARMDisassembler(const Target &T) {
+  return new ARMDisassembler;
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T) {
+  return new ThumbDisassembler;
+}
+
+extern "C" void LLVMInitializeARMDisassembler() { 
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheARMTarget, 
+                                         createARMDisassembler);
+  TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+                                         createThumbDisassembler);
+}
+
+} // namespace llvm
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h
new file mode 100644
index 000000000000..44592e0f1567
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -0,0 +1,91 @@
+//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the header for ARMDisassembler and ThumbDisassembler, both are
+// subclasses of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLER_H
+#define ARMDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+  
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  ARMDisassembler() :
+    MCDisassembler() {
+  }
+
+  ~ARMDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+private:
+};
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+/// Session - Keep track of the IT Block progression.
+class Session {
+  friend class ARMBasicMCBuilder;
+public:
+  Session() : ITCounter(0), ITState(0) {}
+  ~Session() {}
+  /// InitIT - Initializes ITCounter/ITState.
+  void InitIT(unsigned short bits7_0);
+  /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
+  void UpdateIT();
+
+private:
+  unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
+  unsigned ITState;   // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
+};
+
+/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  ThumbDisassembler() :
+    MCDisassembler(), SO() {
+  }
+
+  ~ThumbDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+private:
+  Session SO;
+};
+
+} // namespace llvm
+  
+#endif
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
new file mode 100644
index 000000000000..db921ef0b628
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -0,0 +1,3252 @@
+//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to represent the core concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMDisassemblerCore.h"
+#include "ARMAddressingModes.h"
+
+/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
+/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
+/// describing the operand info for each ARMInsts[i].
+///
+/// Together with an instruction's encoding format, we can take advantage of the
+/// NumOperands and the OpInfo fields of the target instruction description in
+/// the quest to build out the MCOperand list for an MCInst.
+///
+/// The general guideline is that with a known format, the number of dst and src
+/// operands are well-known.  The dst is built first, followed by the src
+/// operand(s).  The operands not yet used at this point are for the Implicit
+/// Uses and Defs by this instr.  For the Uses part, the pred:$p operand is
+/// defined with two components:
+///
+/// def pred {	// Operand PredicateOperand
+///   ValueType Type = OtherVT;
+///   string PrintMethod = "printPredicateOperand";
+///   string AsmOperandLowerMethod = ?;
+///   dag MIOperandInfo = (ops i32imm, CCR);
+///   AsmOperandClass ParserMatchClass = ImmAsmOperand;
+///   dag DefaultOps = (ops (i32 14), (i32 zero_reg));
+/// }
+///
+/// which is manifested by the TargetOperandInfo[] of:
+///
+/// { 0, 0|(1<<TOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 }
+///
+/// So the first predicate MCOperand corresponds to the immediate part of the
+/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
+/// corresponds to a register kind of ARM::CPSR.
+///
+/// For the Defs part, in the simple case of only cc_out:$s, we have:
+///
+/// def cc_out {	// Operand OptionalDefOperand
+///   ValueType Type = OtherVT;
+///   string PrintMethod = "printSBitModifierOperand";
+///   string AsmOperandLowerMethod = ?;
+///   dag MIOperandInfo = (ops CCR);
+///   AsmOperandClass ParserMatchClass = ImmAsmOperand;
+///   dag DefaultOps = (ops (i32 zero_reg));
+/// }
+///
+/// which is manifested by the one TargetOperandInfo of:
+///
+/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 }
+///
+/// And this maps to one MCOperand with the regsiter kind of ARM::CPSR.
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+const char *ARMUtils::OpcodeName(unsigned Opcode) {
+  return ARMInsts[Opcode].Name;
+}
+
+// Return the register enum Based on RegClass and the raw register number.
+// For DRegPair, see comments below.
+// FIXME: Auto-gened?
+static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister,
+                                bool DRegPair = false) {
+
+  if (DRegPair && RegClassID == ARM::QPRRegClassID) {
+    // LLVM expects { Dd, Dd+1 } to form a super register; this is not specified
+    // in the ARM Architecture Manual as far as I understand it (A8.6.307).
+    // Therefore, we morph the RegClassID to be the sub register class and don't
+    // subsequently transform the RawRegister encoding when calculating RegNum.
+    //
+    // See also ARMinstPrinter::printOperand() wrt "dregpair" modifier part
+    // where this workaround is meant for.
+    RegClassID = ARM::DPRRegClassID;
+  }
+
+  // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+  unsigned RegNum =
+    RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
+
+  switch (RegNum) {
+  default:
+    break;
+  case 0:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D0;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q0;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
+    }
+    break;
+  case 1:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D1;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q1;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
+    }
+    break;
+  case 2:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D2;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q2;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
+    }
+    break;
+  case 3:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D3;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q3;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
+    }
+    break;
+  case 4:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D4;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
+    }
+    break;
+  case 5:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D5;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
+    }
+    break;
+  case 6:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D6;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
+    }
+    break;
+  case 7:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D7;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
+    }
+    break;
+  case 8:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R8;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
+    case ARM::QPRRegClassID: return ARM::Q8;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
+    }
+    break;
+  case 9:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R9;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
+    case ARM::QPRRegClassID: return ARM::Q9;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
+    }
+    break;
+  case 10:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R10;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
+    case ARM::QPRRegClassID: return ARM::Q10;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
+    }
+    break;
+  case 11:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R11;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
+    case ARM::QPRRegClassID: return ARM::Q11;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
+    }
+    break;
+  case 12:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R12;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
+    case ARM::QPRRegClassID: return ARM::Q12;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
+    }
+    break;
+  case 13:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::SP;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
+    case ARM::QPRRegClassID: return ARM::Q13;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
+    }
+    break;
+  case 14:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::LR;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
+    case ARM::QPRRegClassID: return ARM::Q14;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
+    }
+    break;
+  case 15:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::PC;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
+    case ARM::QPRRegClassID: return ARM::Q15;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
+    }
+    break;
+  case 16:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D16;
+    case ARM::SPRRegClassID: return ARM::S16;
+    }
+    break;
+  case 17:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D17;
+    case ARM::SPRRegClassID: return ARM::S17;
+    }
+    break;
+  case 18:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D18;
+    case ARM::SPRRegClassID: return ARM::S18;
+    }
+    break;
+  case 19:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D19;
+    case ARM::SPRRegClassID: return ARM::S19;
+    }
+    break;
+  case 20:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D20;
+    case ARM::SPRRegClassID: return ARM::S20;
+    }
+    break;
+  case 21:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D21;
+    case ARM::SPRRegClassID: return ARM::S21;
+    }
+    break;
+  case 22:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D22;
+    case ARM::SPRRegClassID: return ARM::S22;
+    }
+    break;
+  case 23:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D23;
+    case ARM::SPRRegClassID: return ARM::S23;
+    }
+    break;
+  case 24:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D24;
+    case ARM::SPRRegClassID: return ARM::S24;
+    }
+    break;
+  case 25:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D25;
+    case ARM::SPRRegClassID: return ARM::S25;
+    }
+    break;
+  case 26:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D26;
+    case ARM::SPRRegClassID: return ARM::S26;
+    }
+    break;
+  case 27:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D27;
+    case ARM::SPRRegClassID: return ARM::S27;
+    }
+    break;
+  case 28:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D28;
+    case ARM::SPRRegClassID: return ARM::S28;
+    }
+    break;
+  case 29:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D29;
+    case ARM::SPRRegClassID: return ARM::S29;
+    }
+    break;
+  case 30:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D30;
+    case ARM::SPRRegClassID: return ARM::S30;
+    }
+    break;
+  case 31:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D31;
+    case ARM::SPRRegClassID: return ARM::S31;
+    }
+    break;
+  }
+  assert(0 && "Invalid (RegClassID, RawRegister) combination");
+  return 0;
+}
+
+///////////////////////////////
+//                           //
+//     Utility Functions     //
+//                           //
+///////////////////////////////
+
+// Extract/Decode Rd: Inst{15-12}.
+static inline unsigned decodeRd(uint32_t insn) {
+  return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rn: Inst{19-16}.
+static inline unsigned decodeRn(uint32_t insn) {
+  return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rm: Inst{3-0}.
+static inline unsigned decodeRm(uint32_t insn) {
+  return (insn & ARMII::GPRRegMask);
+}
+
+// Extract/Decode Rs: Inst{11-8}.
+static inline unsigned decodeRs(uint32_t insn) {
+  return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
+}
+
+static inline unsigned getCondField(uint32_t insn) {
+  return (insn >> ARMII::CondShift);
+}
+
+static inline unsigned getIBit(uint32_t insn) {
+  return (insn >> ARMII::I_BitShift) & 1;
+}
+
+static inline unsigned getAM3IBit(uint32_t insn) {
+  return (insn >> ARMII::AM3_I_BitShift) & 1;
+}
+
+static inline unsigned getPBit(uint32_t insn) {
+  return (insn >> ARMII::P_BitShift) & 1;
+}
+
+static inline unsigned getUBit(uint32_t insn) {
+  return (insn >> ARMII::U_BitShift) & 1;
+}
+
+static inline unsigned getPUBits(uint32_t insn) {
+  return (insn >> ARMII::U_BitShift) & 3;
+}
+
+static inline unsigned getSBit(uint32_t insn) {
+  return (insn >> ARMII::S_BitShift) & 1;
+}
+
+static inline unsigned getWBit(uint32_t insn) {
+  return (insn >> ARMII::W_BitShift) & 1;
+}
+
+static inline unsigned getDBit(uint32_t insn) {
+  return (insn >> ARMII::D_BitShift) & 1;
+}
+
+static inline unsigned getNBit(uint32_t insn) {
+  return (insn >> ARMII::N_BitShift) & 1;
+}
+
+static inline unsigned getMBit(uint32_t insn) {
+  return (insn >> ARMII::M_BitShift) & 1;
+}
+
+// See A8.4 Shifts applied to a register.
+//     A8.4.2 Register controlled shifts.
+//
+// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
+// into llvm enums for shift opcode.  The API clients should pass in the value
+// encoded with two bits, so the assert stays to signal a wrong API usage.
+//
+// A8-12: DecodeRegShift()
+static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
+  switch (bits) {
+  default: assert(0 && "No such value"); return ARM_AM::no_shift;
+  case 0:  return ARM_AM::lsl;
+  case 1:  return ARM_AM::lsr;
+  case 2:  return ARM_AM::asr;
+  case 3:  return ARM_AM::ror;
+  }
+}
+
+// See A8.4 Shifts applied to a register.
+//     A8.4.1 Constant shifts.
+//
+// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
+// encodings into the intended ShiftOpc and shift amount.
+//
+// A8-11: DecodeImmShift()
+static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
+  // If type == 0b11 and imm5 == 0, we have an rrx, instead.
+  if (ShOp == ARM_AM::ror && ShImm == 0)
+    ShOp = ARM_AM::rrx;
+  // If (lsr or asr) and imm5 == 0, shift amount is 32.
+  if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0)
+    ShImm = 32;
+}
+
+// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
+// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode.  The API
+// clients should pass in the value encoded with two bits, so the assert stays
+// to signal a wrong API usage.
+static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
+  switch (bits) {
+  default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
+  case 1:  return ARM_AM::ia;   // P=0 U=1
+  case 3:  return ARM_AM::ib;   // P=1 U=1
+  case 0:  return ARM_AM::da;   // P=0 U=0
+  case 2:  return ARM_AM::db;   // P=1 U=0
+  }
+}
+
+////////////////////////////////////////////
+//                                        //
+//    Disassemble function definitions    //
+//                                        //
+////////////////////////////////////////////
+
+/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
+/// instr into a list of MCOperands in the appropriate order, with possible dst,
+/// followed by possible src(s).
+///
+/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
+/// the CPSR, is factored into ARMBasicMCBuilder's method named
+/// TryPredicateAndSBitModifier.
+
+static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
+    return true;
+
+  assert(0 && "Unexpected pseudo instruction!");
+  return false;
+}
+
+// Multiply Instructions.
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS:
+//     Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+//
+// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT:
+//     Rd{19-16} Rn{3-0} Rm{11-8}
+//
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT:
+//     RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
+//
+// The mapping of the multiply registers to the "regular" ARM registers, where
+// there are convenience decoder functions, is:
+//
+// Inst{15-12} => Rd
+// Inst{19-16} => Rn
+// Inst{3-0} => Rm
+// Inst{11-8} => Rs
+static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
+  assert(NumOps >= 3
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && OpInfo[2].RegClass == ARM::GPRRegClassID
+         && "Expect three register operands");
+
+  // Instructions with two destination registers have RdLo{15-12} first.
+  if (NumDefs == 2) {
+    assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
+           "Expect 4th register operand");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // The destination register: RdHi{19-16} or Rd{19-16}.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  // The two src regsiters: Rn{3-0}, then Rm{11-8}.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  OpIdx += 3;
+
+  // Many multiply instructions (e.g., MLA) have three src registers.
+  // The third register operand is Ra{15-12}.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Helper routines for disassembly of coprocessor instructions.
+
+static bool LdStCopOpcode(unsigned Opcode) {
+  if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
+      (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
+    return true;
+  return false;
+}
+static bool CoprocessorOpcode(unsigned Opcode) {
+  if (LdStCopOpcode(Opcode))
+    return true;
+
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::CDP:  case ARM::CDP2:
+  case ARM::MCR:  case ARM::MCR2:  case ARM::MRC:  case ARM::MRC2:
+  case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
+    return true;
+  }
+}
+static inline unsigned GetCoprocessor(uint32_t insn) {
+  return slice(insn, 11, 8);
+}
+static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
+  return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
+}
+static inline unsigned GetCopOpc2(uint32_t insn) {
+  return slice(insn, 7, 5);
+}
+static inline unsigned GetCopOpc(uint32_t insn) {
+  return slice(insn, 7, 4);
+}
+// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
+// core registers.
+//
+// CDP, CDP2:                cop opc1 CRd CRn CRm opc2
+//
+// MCR, MCR2, MRC, MRC2:     cop opc1 Rd CRn CRm opc2
+//
+// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
+//
+// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+//                                        <-- addrmode2 -->
+//
+// LDC_OPTION:                    cop CRd Rn imm8
+// and friends
+// STC_OPTION:                    cop CRd Rn imm8
+// and friends
+//
+static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
+
+  unsigned &OpIdx = NumOpsAdded;
+  bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
+                    Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+  // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
+  bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
+  bool LdStCop = LdStCopOpcode(Opcode);
+
+  OpIdx = 0;
+
+  MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn)));
+
+  if (LdStCop) {
+    // Unindex if P:W = 0b00 --> _OPTION variant
+    unsigned PW = getPBit(insn) << 1 | getWBit(insn);
+
+    MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
+
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+
+    if (PW) {
+      MI.addOperand(MCOperand::CreateReg(0));
+      ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+      unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
+                                          ARM_AM::no_shift);
+      MI.addOperand(MCOperand::CreateImm(Offset));
+      OpIdx = 5;
+    } else {
+      MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
+      OpIdx = 4;
+    }
+  } else {
+    MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
+                                                 : GetCopOpc1(insn, NoGPR)));
+
+    MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+                        : MCOperand::CreateReg(
+                            getRegisterEnum(ARM::GPRRegClassID,
+                                            decodeRd(insn))));
+
+    MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
+                                getRegisterEnum(ARM::GPRRegClassID,
+                                                decodeRn(insn)))
+                            : MCOperand::CreateImm(decodeRn(insn)));
+
+    MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
+
+    OpIdx = 5;
+
+    if (!OneCopOpc) {
+      MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// Branch Instructions.
+// BLr9: SignExtend(Imm24:'00', 32)
+// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// SMC: ZeroExtend(imm4, 32)
+// SVC: ZeroExtend(Imm24, 32)
+//
+// Various coprocessor instructions are assigned BrFrm arbitrarily.
+// Delegates to DisassembleCoprocessor() helper function.
+//
+// MRS/MRSsys: Rd
+// MSR/MSRsys: Rm mask=Inst{19-16}
+// BXJ:        Rm
+// MSRi/MSRsysi: so_imm
+// SRSW/SRS: addrmode4:$addr mode_imm
+// RFEW/RFE: addrmode4:$addr Rn
+static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  if (CoprocessorOpcode(Opcode))
+    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  // MRS and MRSsys take one GPR reg Rd.
+  if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // BXJ takes one GPR reg Rm.
+  if (Opcode == ARM::BXJ) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // MSR and MSRsys take one GPR reg Rm, followed by the mask.
+  if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // MSRi and MSRsysi take one so_imm operand, followed by the mask.
+  if (Opcode == ARM::MSRi || Opcode == ARM::MSRsysi) {
+    // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+    // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+    // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+    unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+    unsigned Imm = insn & 0xFF;
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // SRSW and SRS requires addrmode4:$addr for ${addr:submode}, followed by the
+  // mode immediate (Inst{4-0}).
+  if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
+      Opcode == ARM::RFEW || Opcode == ARM::RFE) {
+    // ARMInstPrinter::printAddrMode4Operand() prints special mode string
+    // if the base register is SP; so don't set ARM::SP.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+    if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
+      MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+    else
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         decodeRn(insn))));
+    NumOpsAdded = 3;
+    return true;
+  }
+
+  assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred
+          || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
+         "Unexpected Opcode");
+
+  assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected");
+
+  int Imm32 = 0;
+  if (Opcode == ARM::SMC) {
+    // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
+    Imm32 = slice(insn, 3, 0);
+  } else if (Opcode == ARM::SVC) {
+    // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
+    Imm32 = slice(insn, 23, 0);
+  } else {
+    // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
+    unsigned Imm26 = slice(insn, 23, 0) << 2;
+    //Imm32 = signextend<signed int, 26>(Imm26);
+    Imm32 = SignExtend32<26>(Imm26);
+
+    // When executing an ARM instruction, PC reads as the address of the current
+    // instruction plus 8.  The assembler subtracts 8 from the difference
+    // between the branch instruction and the target address, disassembler has
+    // to add 8 to compensate.
+    Imm32 += 8;
+  }
+
+  MI.addOperand(MCOperand::CreateImm(Imm32));
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+// Misc. Branch Instructions.
+// BR_JTadd, BR_JTr, BR_JTm
+// BLXr9, BXr9
+// BRIND, BX_RET
+static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // BX_RET has only two predicate operands, do an early return.
+  if (Opcode == ARM::BX_RET)
+    return true;
+
+  // BLXr9 and BRIND take one GPR reg.
+  if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) {
+    assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    OpIdx = 1;
+    return true;
+  }
+
+  // BR_JTadd is an ADD with Rd = PC, (Rn, Rm) as the target and index regs.
+  if (Opcode == ARM::BR_JTadd) {
+    // InOperandList with GPR:$target and GPR:$idx regs.
+
+    assert(NumOps == 4 && "Expect 4 operands");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+
+    // Fill in the two remaining imm operands to signify build completion.
+    MI.addOperand(MCOperand::CreateImm(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+
+    OpIdx = 4;
+    return true;
+  }
+
+  // BR_JTr is a MOV with Rd = PC, and Rm as the source register.
+  if (Opcode == ARM::BR_JTr) {
+    // InOperandList with GPR::$target reg.
+
+    assert(NumOps == 3 && "Expect 3 operands");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+
+    // Fill in the two remaining imm operands to signify build completion.
+    MI.addOperand(MCOperand::CreateImm(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+
+    OpIdx = 3;
+    return true;
+  }
+
+  // BR_JTm is an LDR with Rt = PC.
+  if (Opcode == ARM::BR_JTm) {
+    // This is the reg/reg form, with base reg followed by +/- reg shop imm.
+    // See also ARMAddressingModes.h (Addressing Mode #2).
+
+    assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+
+    ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+
+    // Disassemble the offset reg (Rm), shift type, and immediate shift length.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+
+    // Fill in the two remaining imm operands to signify build completion.
+    MI.addOperand(MCOperand::CreateImm(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+
+    OpIdx = 5;
+    return true;
+  }
+
+  assert(0 && "Unexpected BrMiscFrm Opcode");
+  return false;
+}
+
+static inline uint32_t getBFCInvMask(uint32_t insn) {
+  uint32_t lsb = slice(insn, 11, 7);
+  uint32_t msb = slice(insn, 20, 16);
+  uint32_t Val = 0;
+  assert(lsb <= msb && "Encoding error: lsb > msb");
+  for (uint32_t i = lsb; i <= msb; ++i)
+    Val |= (1 << i);
+  return ~Val;
+}
+
+static inline bool SaturateOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16:
+  case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
+  switch (Opcode) {
+  case ARM::SSATlsl:
+  case ARM::SSATasr:
+    return slice(insn, 20, 16) + 1;
+  case ARM::SSAT16:
+    return slice(insn, 19, 16) + 1;
+  case ARM::USATlsl:
+  case ARM::USATasr:
+    return slice(insn, 20, 16);
+  case ARM::USAT16:
+    return slice(insn, 19, 16);
+  default:
+    assert(0 && "Invalid opcode passed in");
+    return 0;
+  }
+}
+
+// A major complication is the fact that some of the saturating add/subtract
+// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
+// They are QADD, QDADD, QDSUB, and QSUB.
+static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  bool isUnary = isUnaryDP(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Disassemble register def if there is one.
+  if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // Now disassemble the src operands.
+  if (OpIdx >= NumOps)
+    return false;
+
+  // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd.
+  if (SaturateOpcode(Opcode)) {
+    MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
+
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+
+    if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
+      OpIdx += 2;
+      return true;
+    }
+
+    // For SSAT operand reg (Rm) has been disassembled above.
+    // Now disassemble the shift amount.
+
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 11, 7);
+
+    // A8.6.183.  Possible ASR shift amount of 32...
+    if (Opcode == ARM::SSATasr && ShAmt == 0)
+      ShAmt = 32;
+
+    MI.addOperand(MCOperand::CreateImm(ShAmt));
+
+    OpIdx += 3;
+    return true;
+  }
+
+  // Special-case handling of BFC/BFI/SBFX/UBFX.
+  if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+    // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
+    MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
+                                       : getRegisterEnum(ARM::GPRRegClassID,
+                                                         decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateImm(getBFCInvMask(insn)));
+    OpIdx += 2;
+    return true;
+  }
+  if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
+    OpIdx += 3;
+    return true;
+  }
+
+  bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
+               Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
+
+  // BinaryDP has an Rn operand.
+  if (!isUnary) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(ARM::GPRRegClassID,
+                                    RmRn ? decodeRm(insn) : decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
+  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Now disassemble operand 2.
+  if (OpIdx >= NumOps)
+    return false;
+
+  if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    // We have a reg/reg form.
+    // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
+    // routed here as well.
+    // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(ARM::GPRRegClassID,
+                                    RmRn? decodeRn(insn) : decodeRm(insn))));
+    ++OpIdx;
+  } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+    // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
+    assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+    unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
+    MI.addOperand(MCOperand::CreateImm(Imm16));
+    ++OpIdx;
+  } else {
+    // We have a reg/imm form.
+    // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+    // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+    // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+    assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+    unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+    unsigned Imm = insn & 0xFF;
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  bool isUnary = isUnaryDP(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Disassemble register def if there is one.
+  if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the src operands.
+  if (OpIdx >= NumOps)
+    return false;
+
+  // BinaryDP has an Rn operand.
+  if (!isUnary) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
+  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Disassemble operand 2, which consists of three components.
+  if (OpIdx + 2 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+2].RegClass == 0) &&
+         "Expect 3 reg operands");
+
+  // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
+  unsigned Rs = slice(insn, 4, 4);
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  if (Rs) {
+    // Register-controlled shifts: [Rm, Rs, shift].
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
+  } else {
+    // Constant shifts: [Rm, reg0, shift_imm].
+    MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
+  }
+  OpIdx += 3;
+
+  return true;
+}
+
+static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  bool isPrePost = isPrePostLdSt(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+         && "Invalid arguments");
+
+  // Operand 0 of a pre- and post-indexed store is the address base writeback.
+  if (isPrePost && isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the dst/src operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // After dst of a pre- and post-indexed load is the address base writeback.
+  if (isPrePost && !isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the base operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+         && "Index mode or tied_to operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  // For reg/reg form, base reg is followed by +/- reg shop imm.
+  // For immediate form, it is followed by +/- imm12.
+  // See also ARMAddressingModes.h (Addressing Mode #2).
+  if (OpIdx + 1 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass == 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  if (getIBit(insn) == 0) {
+    MI.addOperand(MCOperand::CreateReg(0));
+
+    // Disassemble the 12-bit immediate offset.
+    unsigned Imm12 = slice(insn, 11, 0);
+    unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  } else {
+    // Disassemble the offset reg (Rm), shift type, and immediate shift length.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+  }
+  OpIdx += 2;
+
+  return true;
+}
+
+static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+}
+
+static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+}
+
+static bool HasDualReg(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
+  case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
+    return true;
+  }  
+}
+
+static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  bool isPrePost = isPrePostLdSt(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost)))
+         && "Invalid arguments");
+
+  // Operand 0 of a pre- and post-indexed store is the address base writeback.
+  if (isPrePost && isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  bool DualReg = HasDualReg(Opcode);
+
+  // Disassemble the dst/src operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Fill in LDRD and STRD's second operand.
+  if (DualReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn) + 1)));
+    ++OpIdx;
+  }
+
+  // After dst of a pre- and post-indexed load is the address base writeback.
+  if (isPrePost && !isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the base operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+         && "Index mode or tied_to operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  // For reg/reg form, base reg is followed by +/- reg.
+  // For immediate form, it is followed by +/- imm8.
+  // See also ARMAddressingModes.h (Addressing Mode #3).
+  if (OpIdx + 1 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass == 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  if (getAM3IBit(insn) == 1) {
+    MI.addOperand(MCOperand::CreateReg(0));
+
+    // Disassemble the 8-bit immediate offset.
+    unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
+    unsigned Imm4L = insn & 0xF;
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  } else {
+    // Disassemble the offset reg (Rm).
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  }
+  OpIdx += 2;
+
+  return true;
+}
+
+static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+}
+
+static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+}
+
+// The algorithm for disassembly of LdStMulFrm is different from others because
+// it explicitly populates the two predicate operands after operand 0 (the base)
+// and operand 1 (the AM4 mode imm).  After operand 3, we need to populate the
+// reglist with each affected register encoded as an MCOperand.
+static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base, if necessary.
+  if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+  // Handling the two predicate operands before the reglist.
+  int64_t CondVal = insn >> ARMII::CondShift;
+  MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+  OpIdx += 4;
+
+  // Fill the variadic part of reglist.
+  unsigned RegListBits = insn & ((1 << 16) - 1);
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         i)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// LDREX, LDREXB, LDREXH: Rd Rn
+// LDREXD:                Rd Rd+1 Rn
+// STREX, STREXB, STREXH: Rd Rm Rn
+// STREXD:                Rd Rm Rm+1 Rn
+//
+// SWP, SWPB:             Rd Rm Rn
+static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool isStore = slice(insn, 20, 20) == 0;
+  bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Store register Exclusive needs a source operand.
+  if (isStore) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    ++OpIdx;
+
+    if (isDW) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         decodeRm(insn)+1)));
+      ++OpIdx;
+    }
+  } else if (isDW) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn)+1)));
+    ++OpIdx;
+  }
+
+  // Finally add the pointer operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// Misc. Arithmetic Instructions.
+// CLZ: Rd Rm
+// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
+// RBIT, REV, REV16, REVSH: Rd Rm
+static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    assert(NumOps >= 4 && "Expect >= 4 operands");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  // If there is still an operand info left which is an immediate operand, add
+  // an additional imm5 LSL/ASR operand.
+  if (ThreeReg && OpInfo[OpIdx].RegClass == 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Extract the 5-bit immediate field Inst{11-7}.
+    unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
+    MI.addOperand(MCOperand::CreateImm(ShiftAmt));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Extend instructions.
+// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
+// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
+// three register operand form.  Otherwise, Rn=0b1111 and only Rm is used.
+static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  // If there is still an operand info left which is an immediate operand, add
+  // an additional rotate immediate operand.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Extract the 2-bit rotate field Inst{11-10}.
+    unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
+    // Rotation by 8, 16, or 24 bits.
+    MI.addOperand(MCOperand::CreateImm(rot << 3));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+/////////////////////////////////////
+//                                 //
+//    Utility Functions For VFP    //
+//                                 //
+/////////////////////////////////////
+
+// Extract/Decode Dd/Sd:
+//
+// SP => d = UInt(Vd:D)
+// DP => d = UInt(D:Vd)
+static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
+                 : (decodeRd(insn) | getDBit(insn) << 4);
+}
+
+// Extract/Decode Dn/Sn:
+//
+// SP => n = UInt(Vn:N)
+// DP => n = UInt(N:Vn)
+static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
+                 : (decodeRn(insn) | getNBit(insn) << 4);
+}
+
+// Extract/Decode Dm/Sm:
+//
+// SP => m = UInt(Vm:M)
+// DP => m = UInt(M:Vm)
+static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
+                 : (decodeRm(insn) | getMBit(insn) << 4);
+}
+
+// A7.5.1
+#if 0
+static uint64_t VFPExpandImm(unsigned char byte, unsigned N) {
+  assert(N == 32 || N == 64);
+
+  uint64_t Result;
+  unsigned bit6 = slice(byte, 6, 6);
+  if (N == 32) {
+    Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
+    if (bit6)
+      Result |= 0x1f << 25;
+    else
+      Result |= 0x1 << 30;
+  } else {
+    Result = (uint64_t)slice(byte, 7, 7) << 63 |
+             (uint64_t)slice(byte, 5, 0) << 48;
+    if (bit6)
+      Result |= 0xffL << 54;
+    else
+      Result |= 0x1L << 62;
+  }
+  return Result;
+}
+#endif
+
+// VFP Unary Format Instructions:
+//
+// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
+// VCVTDS, VCVTSD: converts between double-precision and single-precision
+// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
+static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  bool isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass, decodeVFPRd(insn, isSP))));
+  ++OpIdx;
+
+  // Early return for compare with zero instructions.
+  if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
+      || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
+    return true;
+
+  RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass, decodeVFPRm(insn, isSP))));
+  ++OpIdx;
+
+  return true;
+}
+
+// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
+// Some of them have operand constraints which tie the first operand in the
+// InOperandList to that of the dst.  As far as asm printing is concerned, this
+// tied_to operand is simply skipped.
+static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  bool isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass, decodeVFPRd(insn, isSP))));
+  ++OpIdx;
+
+  // Skip tied_to operand constraint.
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    assert(NumOps >= 4 && "Expect >=4 operands");
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass, decodeVFPRn(insn, isSP))));
+  ++OpIdx;
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass, decodeVFPRm(insn, isSP))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A8.6.295 vcvt (floating-point <-> integer)
+// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
+// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
+// 
+// A8.6.297 vcvt (floating-point and fixed-point)
+// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
+static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
+  bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
+  unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  if (fixed_point) {
+    // A8.6.297
+    assert(NumOps >= 3 && "Expect >= 3 operands");
+    int size = slice(insn, 7, 7) == 0 ? 16 : 32;
+    int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(RegClassID,
+                                    decodeVFPRd(insn, SP))));
+
+    assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+           "Tied to operand expected");
+    MI.addOperand(MI.getOperand(0));
+
+    assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() &&
+           !OpInfo[2].isOptionalDef() && "Imm operand expected");
+    MI.addOperand(MCOperand::CreateImm(fbits));
+
+    NumOpsAdded = 3;
+  } else {
+    // A8.6.295
+    // The Rd (destination) and Rm (source) bits have different interpretations
+    // depending on their single-precisonness.
+    unsigned d, m;
+    if (slice(insn, 18, 18) == 1) { // to_integer operation
+      d = decodeVFPRd(insn, true /* Is Single Precision */);
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(ARM::SPRRegClassID, d)));
+      m = decodeVFPRm(insn, SP);
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, m)));
+    } else {
+      d = decodeVFPRd(insn, SP);
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, d)));
+      m = decodeVFPRm(insn, true /* Is Single Precision */);
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(ARM::SPRRegClassID, m)));
+    }
+    NumOpsAdded = 2;
+  }
+
+  return true;
+}
+
+// VMOVRS - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+                                                     decodeVFPRn(insn, true))));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// VMOVRRD - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  OpIdx = 2;
+
+  if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+    unsigned Sm = decodeVFPRm(insn, true);
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+                                                       Sm)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+                                                       Sm+1)));
+    OpIdx += 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(ARM::DPRRegClassID,
+                                    decodeVFPRm(insn, false))));
+    ++OpIdx;
+  }
+  return true;
+}
+
+// VMOVSR - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+                                                     decodeVFPRn(insn, true))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// VMOVDRR - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+    unsigned Sm = decodeVFPRm(insn, true);
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+                                                       Sm)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID,
+                                                       Sm+1)));
+    OpIdx += 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(ARM::DPRRegClassID,
+                                    decodeVFPRm(insn, false))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  OpIdx += 2;
+  return true;
+}
+
+// VFP Load/Store Instructions.
+// VLDRD, VLDRS, VSTRD, VSTRS
+static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
+
+  bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
+  unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  // Extract Dd/Sd for operand 0.
+  unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, RegD)));
+
+  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Next comes the AM5 Opcode.
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned char Imm8 = insn & 0xFF;
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
+
+  NumOpsAdded = 3;
+
+  return true;
+}
+
+// VFP Load/Store Multiple Instructions.
+// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
+// operand 1 (the AM5 mode imm) is followed by two predicate operands.  It is
+// followed by a reglist of either DPR(s) or SPR(s).
+//
+// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
+static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base, if necessary.
+  if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD ||
+      Opcode == ARM::VSTMD_UPD || Opcode == ARM::VSTMS_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Next comes the AM5 Opcode.
+  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+  unsigned char Imm8 = insn & 0xFF;
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
+
+  // Handling the two predicate operands before the reglist.
+  int64_t CondVal = insn >> ARMII::CondShift;
+  MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+  OpIdx += 4;
+
+  bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
+     Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
+  unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  // Extract Dd/Sd.
+  unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+  // Fill the variadic part of reglist.
+  unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+  for (unsigned i = 0; i < Regs; ++i) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID,
+                                                       RegD + i)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Misc. VFP Instructions.
+// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
+// FCONSTD (DPR and a VFPf64Imm operand)
+// FCONSTS (SPR and a VFPf32Imm operand)
+// VMRS/VMSR (GPR operand)
+static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  if (Opcode == ARM::FMSTAT)
+    return true;
+
+  assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
+
+  unsigned RegEnum = 0;
+  switch (OpInfo[0].RegClass) {
+  case ARM::DPRRegClassID:
+    RegEnum = getRegisterEnum(ARM::DPRRegClassID, decodeVFPRd(insn, false));
+    break;
+  case ARM::SPRRegClassID:
+    RegEnum = getRegisterEnum(ARM::SPRRegClassID, decodeVFPRd(insn, true));
+    break;
+  case ARM::GPRRegClassID:
+    RegEnum = getRegisterEnum(ARM::GPRRegClassID, decodeRd(insn));
+    break;
+  default:
+    assert(0 && "Invalid reg class id");
+    return false;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(RegEnum));
+  ++OpIdx;
+
+  // Extract/decode the f64/f32 immediate.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // The asm syntax specifies the before-expanded <imm>.
+    // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+    //                  Opcode == ARM::FCONSTD ? 64 : 32)
+    MI.addOperand(MCOperand::CreateImm(slice(insn,19,16)<<4 | slice(insn,3,0)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
+#include "ThumbDisassemblerCore.h"
+
+/////////////////////////////////////////////////////
+//                                                 //
+//     Utility Functions For ARM Advanced SIMD     //
+//                                                 //
+/////////////////////////////////////////////////////
+
+// The following NEON namings are based on A8.6.266 VABA, VABAL.  Notice that
+// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
+
+// A7.3 Register encoding
+
+// Extract/Decode NEON D/Vd:
+//
+// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
+// doubleword, Dd = UInt(D:Vd).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// D = Inst{22}, Vd = Inst{15-12}
+static unsigned decodeNEONRd(uint32_t insn) {
+  return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
+    | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask;
+}
+
+// Extract/Decode NEON N/Vn:
+//
+// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
+// doubleword, Dn = UInt(N:Vn).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// N = Inst{7}, Vn = Inst{19-16}
+static unsigned decodeNEONRn(uint32_t insn) {
+  return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
+    | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask;
+}
+
+// Extract/Decode NEON M/Vm:
+//
+// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
+// doubleword, Dm = UInt(M:Vm).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// M = Inst{5}, Vm = Inst{3-0}
+static unsigned decodeNEONRm(uint32_t insn) {
+  return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
+    | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask;
+}
+
+namespace {
+enum ElemSize {
+  ESizeNA = 0,
+  ESize8 = 8,
+  ESize16 = 16,
+  ESize32 = 32,
+  ESize64 = 64
+};
+} // End of unnamed namespace
+
+// size        field -> Inst{11-10}
+// index_align field -> Inst{7-4}
+//
+// The Lane Index interpretation depends on the Data Size:
+//   8  (encoded as size = 0b00) -> Index = index_align[3:1]
+//   16 (encoded as size = 0b01) -> Index = index_align[3:2]
+//   32 (encoded as size = 0b10) -> Index = index_align[3]
+//
+// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
+static unsigned decodeLaneIndex(uint32_t insn) {
+  unsigned size = insn >> 10 & 3;
+  assert((size == 0 || size == 1 || size == 2) &&
+         "Encoding error: size should be either 0, 1, or 2");
+
+  unsigned index_align = insn >> 4 & 0xF;
+  return (index_align >> 1) >> size;
+}
+
+// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
+// op = Inst{5}, cmode = Inst{11-8}
+// i = Inst{24} (ARM architecture)
+// imm3 = Inst{18-16}, imm4 = Inst{3-0}
+// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
+static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+  unsigned char cmode = (insn >> 8) & 0xF;
+  unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
+                       ((insn >> 16) & 7) << 4 |
+                       (insn & 0xF);
+  uint64_t Imm64 = 0;
+
+  switch (esize) {
+  case ESize8:
+    Imm64 = Imm8;
+    break;
+  case ESize16:
+    Imm64 = Imm8 << 8*(cmode >> 1 & 1);
+    break;
+  case ESize32: {
+    if (cmode == 12)
+      Imm64 = (Imm8 << 8) | 0xFF;
+    else if (cmode == 13)
+      Imm64 = (Imm8 << 16) | 0xFFFF;
+    else {
+      // Imm8 to be shifted left by how many bytes...
+      Imm64 = Imm8 << 8*(cmode >> 1 & 3);
+    }
+    break;
+  }
+  case ESize64: {
+    for (unsigned i = 0; i < 8; ++i)
+      if ((Imm8 >> i) & 1)
+        Imm64 |= 0xFF << 8*i;
+    break;
+  }
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+
+  return Imm64;
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
+// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
+static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize16:
+    return insn & 7;
+  case ESize32:
+    return insn & 0xF;
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
+// ESize32 => index = Inst{5}   (M)       D0-D15
+static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize16:
+    return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
+  case ESize32:
+    return (insn >> 5) & 1;
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+}
+
+// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
+// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
+static unsigned decodeVCVTFractionBits(uint32_t insn) {
+  return 64 - ((insn >> 16) & 0x3F);
+}
+
+// A8.6.302 VDUP (scalar)
+// ESize8  => index = Inst{19-17}
+// ESize16 => index = Inst{19-18}
+// ESize32 => index = Inst{19}
+static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize8:
+    return (insn >> 17) & 7;
+  case ESize16:
+    return (insn >> 18) & 3;
+  case ESize32:
+    return (insn >> 19) & 1;
+  default:
+    assert(0 && "Unspecified element size!");
+    return 0;
+  }
+}
+
+// A8.6.328 VMOV (ARM core register to scalar)
+// A8.6.329 VMOV (scalar to ARM core register)
+// ESize8  => index = Inst{21:6-5}
+// ESize16 => index = Inst{21:6}
+// ESize32 => index = Inst{21}
+static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize8:
+    return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
+  case ESize16:
+    return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
+  case ESize32:
+    return ((insn >> 21) & 1);
+  default:
+    assert(0 && "Unspecified element size!");
+    return 0;
+  }
+}
+
+// Imm6 = Inst{21-16}, L = Inst{7}
+//
+// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
+// case L:imm6 of
+//   '0001xxx' => esize = 8; shift_amount = imm6 - 8
+//   '001xxxx' => esize = 16; shift_amount = imm6 - 16
+//   '01xxxxx' => esize = 32; shift_amount = imm6 - 32
+//   '1xxxxxx' => esize = 64; shift_amount = imm6
+//
+// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
+// case L:imm6 of
+//   '0001xxx' => esize = 8; shift_amount = 16 - imm6
+//   '001xxxx' => esize = 16; shift_amount = 32 - imm6
+//   '01xxxxx' => esize = 32; shift_amount = 64 - imm6
+//   '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
+//
+static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
+  ElemSize esize = ESizeNA;
+  unsigned L = (insn >> 7) & 1;
+  unsigned imm6 = (insn >> 16) & 0x3F;
+  if (L == 0) {
+    if (imm6 >> 3 == 1)
+      esize = ESize8;
+    else if (imm6 >> 4 == 1)
+      esize = ESize16;
+    else if (imm6 >> 5 == 1)
+      esize = ESize32;
+    else
+      assert(0 && "Wrong encoding of Inst{7:21-16}!");
+  } else
+    esize = ESize64;
+
+  if (LeftShift)
+    return esize == ESize64 ? imm6 : (imm6 - esize);
+  else
+    return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
+}
+
+// A8.6.305 VEXT
+// Imm4 = Inst{11-8}
+static unsigned decodeN3VImm(uint32_t insn) {
+  return (insn >> 8) & 0xF;
+}
+
+// VLD*
+//   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
+// VLD*LN*
+//   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
+// VST*
+//   Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
+// VST*LN*
+//   Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
+//
+// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
+static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  // At least one DPR register plus addressing mode #6.
+  assert(NumOps >= 3 && "Expect >= 3 operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // We have homogeneous NEON registers for Load/Store.
+  unsigned RegClass = 0;
+
+  // Double-spaced registers have increments of 2.
+  unsigned Inc = DblSpaced ? 2 : 1;
+
+  unsigned Rn = decodeRn(insn);
+  unsigned Rm = decodeRm(insn);
+  unsigned Rd = decodeNEONRd(insn);
+
+  // A7.7.1 Advanced SIMD addressing mode.
+  bool WB = Rm != 15;
+
+  // LLVM Addressing Mode #6.
+  unsigned RmEnum = 0;
+  if (WB && Rm != 13)
+    RmEnum = getRegisterEnum(ARM::GPRRegClassID, Rm);
+
+  if (Store) {
+    // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
+    // then possible lane index.
+    assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         Rn)));
+      ++OpIdx;
+    }
+
+    assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       Rn)));
+    MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+    OpIdx += 2;
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(RmEnum));
+      ++OpIdx;
+    }
+
+    assert(OpIdx < NumOps &&
+           (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+            OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+           "Reg operand expected");
+
+    RegClass = OpInfo[OpIdx].RegClass;
+    while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+      if (Opcode >= ARM::VST1q16 && Opcode <= ARM::VST1q8)
+        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true)));
+      else
+        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd)));
+      Rd += Inc;
+      ++OpIdx;
+    }
+
+    // Handle possible lane index.
+    if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+      MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+      ++OpIdx;
+    }
+
+  } else {
+    // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
+    // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
+    RegClass = OpInfo[0].RegClass;
+
+    while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+      if (Opcode >= ARM::VLD1q16 && Opcode <= ARM::VLD1q8)
+        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true)));
+      else
+        MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd)));
+      Rd += Inc;
+      ++OpIdx;
+    }
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         Rn)));
+      ++OpIdx;
+    }
+
+    assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       Rn)));
+    MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+    OpIdx += 2;
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(RmEnum));
+      ++OpIdx;
+    }
+
+    while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+      assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
+             "Tied to operand expected");
+      MI.addOperand(MCOperand::CreateReg(0));
+      ++OpIdx;
+    }
+
+    // Handle possible lane index.
+    if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+      MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// A7.7
+// If L (Inst{21}) == 0, store instructions.
+// Find out about double-spaced-ness of the Opcode and pass it on to
+// DisassembleNLdSt0().
+static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const StringRef Name = ARMInsts[Opcode].Name;
+  bool DblSpaced = false;
+
+  if (Name.find("LN") != std::string::npos) {
+    // To one lane instructions.
+    // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
+
+    // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
+    if (Name.endswith("16") || Name.endswith("16_UPD"))
+      DblSpaced = slice(insn, 5, 5) == 1;
+
+    // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
+    if (Name.endswith("32") || Name.endswith("32_UPD"))
+      DblSpaced = slice(insn, 6, 6) == 1;
+
+  } else {
+    // Multiple n-element structures with type encoded as Inst{11-8}.
+    // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
+
+    // n == 2 && type == 0b1001 -> DblSpaced = true
+    if (Name.startswith("VST2") || Name.startswith("VLD2"))
+      DblSpaced = slice(insn, 11, 8) == 9;
+    
+    // n == 3 && type == 0b0101 -> DblSpaced = true
+    if (Name.startswith("VST3") || Name.startswith("VLD3"))
+      DblSpaced = slice(insn, 11, 8) == 5;
+    
+    // n == 4 && type == 0b0001 -> DblSpaced = true
+    if (Name.startswith("VST4") || Name.startswith("VLD4"))
+      DblSpaced = slice(insn, 11, 8) == 1;
+    
+  }
+  return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
+                           slice(insn, 21, 21) == 0, DblSpaced);
+}
+
+// VMOV (immediate)
+//   Qd/Dd imm
+static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[0].RegClass,
+                                                     decodeNEONRd(insn))));
+
+  ElemSize esize = ESizeNA;
+  switch (Opcode) {
+  case ARM::VMOVv8i8:
+  case ARM::VMOVv16i8:
+    esize = ESize8;
+    break;
+  case ARM::VMOVv4i16:
+  case ARM::VMOVv8i16:
+    esize = ESize16;
+    break;
+  case ARM::VMOVv2i32:
+  case ARM::VMOVv4i32:
+    esize = ESize32;
+    break;
+  case ARM::VMOVv1i64:
+  case ARM::VMOVv2i64:
+    esize = ESize64;
+  default:
+    assert(0 && "Unreachable code!");
+    return false;
+  }
+
+  // One register and a modified immediate value.
+  // Add the imm operand.
+  MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
+
+  NumOpsAdded = 2;
+  return true;
+}
+
+namespace {
+enum N2VFlag {
+  N2V_None,
+  N2V_VectorDupLane,
+  N2V_VectorConvert_Between_Float_Fixed
+};
+} // End of unnamed namespace
+
+// Vector Convert [between floating-point and fixed-point]
+//   Qd/Dd Qm/Dm [fbits]
+//
+// Vector Duplicate Lane (from scalar to all elements) Instructions.
+// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
+//   Qd/Dd Dm index
+//
+// Vector Move Long:
+//   Qd Dm
+// 
+// Vector Move Narrow:
+//   Dd Qm
+//
+// Others
+static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag = N2V_None) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opc];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 2 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  ElemSize esize = ESizeNA;
+  if (Flag == N2V_VectorDupLane) {
+    // VDUPLN has its index embedded.  Its size can be inferred from the Opcode.
+    assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
+           "Unexpected Opcode");
+    esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
+       : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
+                                                           : ESize32);
+  }
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // VPADAL...
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  // VZIP and others have two TIED_TO reg operands.
+  int Idx;
+  while (OpIdx < NumOps &&
+         (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    // Add TIED_TO operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Add the imm operand, if required.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+
+    unsigned imm = 0xFFFFFFFF;
+
+    if (Flag == N2V_VectorDupLane)
+      imm = decodeNVLaneDupIndex(insn, esize);
+    if (Flag == N2V_VectorConvert_Between_Float_Fixed)
+      imm = decodeVCVTFractionBits(insn);
+
+    assert(imm != 0xFFFFFFFF && "Internal error");
+    MI.addOperand(MCOperand::CreateImm(imm));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded);
+}
+static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_VectorConvert_Between_Float_Fixed);
+}
+static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_VectorDupLane);
+}
+
+// Vector Shift [Accumulate] Instructions.
+// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
+//
+// Vector Shift Left Long (with maximum shift count) Instructions.
+// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
+//
+static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 3 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 3 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+          OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+         "Reg operand expected");
+
+  // Qm/Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected");
+
+  // Add the imm operand.
+  
+  // VSHLL has maximum shift count as the imm, inferred from its size.
+  unsigned Imm;
+  switch (Opcode) {
+  default:
+    Imm = decodeNVSAmt(insn, LeftShift);
+    break;
+  case ARM::VSHLLi8:
+    Imm = 8;
+    break;
+  case ARM::VSHLLi16:
+    Imm = 16;
+    break;
+  case ARM::VSHLLi32:
+    Imm = 32;
+    break;
+  }
+  MI.addOperand(MCOperand::CreateImm(Imm));
+  ++OpIdx;
+
+  return true;
+}
+
+// Left shift instructions.
+static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true);
+}
+// Right shift instructions have different shift amount interpretation.
+static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false);
+}
+
+namespace {
+enum N3VFlag {
+  N3V_None,
+  N3V_VectorExtract,
+  N3V_VectorShift,
+  N3V_Multiply_By_Scalar
+};
+} // End of unnamed namespace
+
+// NEON Three Register Instructions with Optional Immediate Operand
+//
+// Vector Extract Instructions.
+// Qd/Dd Qn/Dn Qm/Dm imm4
+//
+// Vector Shift (Register) Instructions.
+// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
+//
+// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
+// Qd/Dd Qn/Dn RestrictedDm index
+//
+// Others
+static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag = N3V_None) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
+  assert(NumOps >= 3 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 3 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  bool VdVnVm = Flag == N3V_VectorShift ? false : true;
+  bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
+  bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
+  ElemSize esize = ESizeNA;
+  if (Flag == N3V_Multiply_By_Scalar) {
+    unsigned size = (insn >> 20) & 3;
+    if (size == 1) esize = ESize16;
+    if (size == 2) esize = ESize32;
+    assert (esize == ESize16 || esize == ESize32);
+  }
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // VABA, VABAL, VBSLd, VBSLq, ...
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Dn = Inst{7:19-16} => NEON Rn
+  // or
+  // Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(OpInfo[OpIdx].RegClass,
+                                  VdVnVm ? decodeNEONRn(insn)
+                                         : decodeNEONRm(insn))));
+  ++OpIdx;
+
+  // Special case handling for VMOVDneon and VMOVQ because they are marked as
+  // N3RegFrm.
+  if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
+    return true;
+  
+  // Dm = Inst{5:3-0} => NEON Rm
+  // or
+  // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
+  // or
+  // Dn = Inst{7:19-16} => NEON Rn
+  unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
+                                        : decodeNEONRm(insn))
+                      : decodeNEONRn(insn);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(OpInfo[OpIdx].RegClass, m)));
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Add the imm operand.
+    unsigned Imm = 0;
+    if (IsImm4)
+      Imm = decodeN3VImm(insn);
+    else if (IsDmRestricted)
+      Imm = decodeRestrictedDmIndex(insn, esize);
+    else {
+      assert(0 && "Internal error: unreachable code!");
+      return false;
+    }
+
+    MI.addOperand(MCOperand::CreateImm(Imm));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+}
+static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_VectorShift);
+}
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_VectorExtract);
+}
+static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_Multiply_By_Scalar);
+}
+
+// Vector Table Lookup
+//
+// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
+// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
+// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
+// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
+static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::DPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         OpInfo[2].RegClass == ARM::DPRRegClassID &&
+         "Expect >= 3 operands and first 3 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Rn = decodeNEONRn(insn);
+
+  // {Dn} encoded as len = 0b00
+  // {Dn Dn+1} encoded as len = 0b01
+  // {Dn Dn+1 Dn+2 } encoded as len = 0b10
+  // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
+  unsigned Len = slice(insn, 9, 8) + 1;
+
+  // Dd (the destination vector)
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // Process tied_to operand constraint.
+  int Idx;
+  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Do the <list> now.
+  for (unsigned i = 0; i < Len; ++i) {
+    assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+                                                       Rn + i)));
+    ++OpIdx;
+  }
+
+  // Dm (the index vector)
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+         "Reg operand (index vector) expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+  assert(0 && "Unreachable code!");
+  return false;
+}
+
+// Vector Get Lane (move scalar to ARM core register) Instructions.
+// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
+static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumDefs == 1 && NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         OpInfo[2].RegClass == 0 &&
+         "Expect >= 3 operands with one dst operand");
+
+  ElemSize esize =
+    Opcode == ARM::VGETLNi32 ? ESize32
+      : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
+                                                                : ESize32);
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  // Dn = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+                                                     decodeNEONRn(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+  NumOpsAdded = 3;
+  return true;
+}
+
+// Vector Set Lane (move ARM core register to scalar) Instructions.
+// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
+static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumDefs == 1 && NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::DPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+         OpInfo[2].RegClass == ARM::GPRRegClassID &&
+         OpInfo[3].RegClass == 0 &&
+         "Expect >= 3 operands with one dst operand");
+
+  ElemSize esize =
+    Opcode == ARM::VSETLNi8 ? ESize8
+                            : (Opcode == ARM::VSETLNi16 ? ESize16
+                                                        : ESize32);
+
+  // Dd = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID,
+                                                     decodeNEONRn(insn))));
+
+  // TIED_TO operand.
+  MI.addOperand(MCOperand::CreateReg(0));
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+  NumOpsAdded = 4;
+  return true;
+}
+
+// Vector Duplicate Instructions (from ARM core register to all elements).
+// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
+static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first 2 as reg operand");
+
+  unsigned RegClass = OpInfo[0].RegClass;
+
+  // Qd/Dd = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,
+                                                     decodeNEONRn(insn))));
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  NumOpsAdded = 2;
+  return true;
+}
+
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool MemBarrierInstr(uint32_t insn) {
+  unsigned op7_4 = slice(insn, 7, 4);
+  if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6))
+    return true;
+
+  return false;
+}
+
+static inline bool PreLoadOpcode(unsigned Opcode) {
+  switch(Opcode) {
+  case ARM::PLDi:  case ARM::PLDr:
+  case ARM::PLDWi: case ARM::PLDWr:
+  case ARM::PLIi:  case ARM::PLIr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  // Preload Data/Instruction requires either 2 or 4 operands.
+  // PLDi, PLDWi, PLIi:                Rn [+/-]imm12 add = (U == '1')
+  // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) {
+    unsigned Imm12 = slice(insn, 11, 0);
+    bool Negative = getUBit(insn) == 0;
+    int Offset = Negative ? -1 - Imm12 : 1 * Imm12;
+    MI.addOperand(MCOperand::CreateImm(Offset));
+    NumOpsAdded = 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+
+    ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+    NumOpsAdded = 3;
+  }
+
+  return true;
+}
+
+static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  if (MemBarrierInstr(insn))
+    return true;
+
+  switch (Opcode) {
+  case ARM::CLREX:
+  case ARM::NOP:
+  case ARM::TRAP:
+  case ARM::YIELD:
+  case ARM::WFE:
+  case ARM::WFI:
+  case ARM::SEV:
+  case ARM::SETENDBE:
+  case ARM::SETENDLE:
+    return true;
+  default:
+    break;
+  }
+
+  // CPS has a singleton $opt operand that contains the following information:
+  // opt{4-0} = mode from Inst{4-0}
+  // opt{5} = changemode from Inst{17}
+  // opt{8-6} = AIF from Inst{8-6}
+  // opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+  if (Opcode == ARM::CPS) {
+    unsigned Option = slice(insn, 4, 0) | slice(insn, 17, 17) << 5 |
+      slice(insn, 8, 6) << 6 | slice(insn, 19, 18) << 9;
+    MI.addOperand(MCOperand::CreateImm(Option));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // DBG has its option specified in Inst{3-0}.
+  if (Opcode == ARM::DBG) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
+  if (Opcode == ARM::BKPT) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
+                                       slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  if (PreLoadOpcode(Opcode))
+    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  assert(0 && "Unexpected misc instruction!");
+  return false;
+}
+
+static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(0 && "Unexpected thumb misc. instruction!");
+  return false;
+}
+
+/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
+/// We divide the disassembly task into different categories, with each one
+/// corresponding to a specific instruction encoding format.  There could be
+/// exceptions when handling a specific format, and that is why the Opcode is
+/// also present in the function prototype.
+static const DisassembleFP FuncPtrs[] = {
+  &DisassemblePseudo,
+  &DisassembleMulFrm,
+  &DisassembleBrFrm,
+  &DisassembleBrMiscFrm,
+  &DisassembleDPFrm,
+  &DisassembleDPSoRegFrm,
+  &DisassembleLdFrm,
+  &DisassembleStFrm,
+  &DisassembleLdMiscFrm,
+  &DisassembleStMiscFrm,
+  &DisassembleLdStMulFrm,
+  &DisassembleLdStExFrm,
+  &DisassembleArithMiscFrm,
+  &DisassembleExtFrm,
+  &DisassembleVFPUnaryFrm,
+  &DisassembleVFPBinaryFrm,
+  &DisassembleVFPConv1Frm,
+  &DisassembleVFPConv2Frm,
+  &DisassembleVFPConv3Frm,
+  &DisassembleVFPConv4Frm,
+  &DisassembleVFPConv5Frm,
+  &DisassembleVFPLdStFrm,
+  &DisassembleVFPLdStMulFrm,
+  &DisassembleVFPMiscFrm,
+  &DisassembleThumbFrm,
+  &DisassembleNEONFrm,
+  &DisassembleNEONGetLnFrm,
+  &DisassembleNEONSetLnFrm,
+  &DisassembleNEONDupFrm,
+  &DisassembleMiscFrm,
+  &DisassembleThumbMiscFrm,
+
+  // VLD and VST (including one lane) Instructions.
+  &DisassembleNLdSt,
+
+  // A7.4.6 One register and a modified immediate value
+  // 1-Register Instructions with imm.
+  // LLVM only defines VMOVv instructions.
+  &DisassembleN1RegModImmFrm,
+
+  // 2-Register Instructions with no imm.
+  &DisassembleN2RegFrm,
+
+  // 2-Register Instructions with imm (vector convert float/fixed point).
+  &DisassembleNVCVTFrm,
+
+  // 2-Register Instructions with imm (vector dup lane).
+  &DisassembleNVecDupLnFrm,
+
+  // Vector Shift Left Instructions.
+  &DisassembleN2RegVecShLFrm,
+
+  // Vector Shift Righ Instructions, which has different interpretation of the
+  // shift amount from the imm6 field.
+  &DisassembleN2RegVecShRFrm,
+
+  // 3-Register Data-Processing Instructions.
+  &DisassembleN3RegFrm,
+
+  // Vector Shift (Register) Instructions.
+  // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
+  &DisassembleN3RegVecShFrm,
+
+  // Vector Extract Instructions.
+  &DisassembleNVecExtractFrm,
+
+  // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
+  // By Scalar Instructions.
+  &DisassembleNVecMulScalarFrm,
+
+  // Vector Table Lookup uses byte indexes in a control vector to look up byte
+  // values in a table and generate a new vector.
+  &DisassembleNVTBLFrm,
+
+  NULL
+};
+
+/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+/// The general idea is to set the Opcode for the MCInst, followed by adding
+/// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
+/// to the Format-specific disassemble function for disassembly, followed by
+/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
+/// which follow the Dst/Src Operands.
+bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
+  // Stage 1 sets the Opcode.
+  MI.setOpcode(Opcode);
+  // If the number of operands is zero, we're done!
+  if (NumOps == 0)
+    return true;
+
+  // Stage 2 calls the format-specific disassemble function to build the operand
+  // list.
+  if (Disasm == NULL)
+    return false;
+  unsigned NumOpsAdded = 0;
+  bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
+
+  if (!OK) return false;
+  if (NumOpsAdded >= NumOps)
+    return true;
+
+  // Stage 3 deals with operands unaccounted for after stage 2 is finished.
+  // FIXME: Should this be done selectively?
+  return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
+}
+
+bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const std::string &Name = ARMInsts[Opcode].Name;
+  unsigned Idx = MI.getNumOperands();
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of TargetOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else {
+      if (Name.length() > 1 && Name[0] == 't') {
+        // Thumb conditional branch instructions have their cond field embedded,
+        // like ARM.
+        //
+        // A8.6.16 B
+        if (Name == "t2Bcc")
+          MI.addOperand(MCOperand::CreateImm(slice(insn, 25, 22)));
+        else if (Name == "tBcc")
+          MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
+        else
+          MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      } else {
+        // ARM Instructions.  Check condition field.
+        int64_t CondVal = getCondField(insn);
+        if (CondVal == 0xF)
+          MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+        else
+          MI.addOperand(MCOperand::CreateImm(CondVal));
+      }
+    }
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    Idx += 2;
+    NumOpsRemaining -= 2;
+    if (NumOpsRemaining == 0)
+      return true;
+  }
+
+  // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
+  if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
+    --NumOpsRemaining;
+  }
+
+  if (NumOpsRemaining == 0)
+    return true;
+  else
+    return false;
+}
+
+/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+/// after BuildIt is finished.
+bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
+    uint32_t insn) {
+
+  if (!SP) return Status;
+
+  if (Opcode == ARM::t2IT)
+    SP->InitIT(slice(insn, 7, 0));
+  else if (InITBlock())
+    SP->UpdateIT();
+
+  return Status;
+}
+
+/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
+                                     unsigned short num)
+  : Opcode(opc), Format(format), NumOps(num), SP(0) {
+  unsigned Idx = (unsigned)format;
+  assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
+  Disasm = FuncPtrs[Idx];
+}
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+
+  return new ARMBasicMCBuilder(Opcode, Format,
+                               ARMInsts[Opcode].getNumOperands());
+}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
new file mode 100644
index 000000000000..307523037089
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -0,0 +1,248 @@
+//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+//
+// The first part defines the enumeration type of ARM instruction format, which
+// specifies the encoding used by the instruction, as well as a helper function
+// to convert the enums to printable char strings.
+//
+// It also contains code to represent the concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLERCORE_H
+#define ARMDISASSEMBLERCORE_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMDisassembler.h"
+
+namespace llvm {
+
+class ARMUtils {
+public:
+  static const char *OpcodeName(unsigned Opcode);
+};
+
+/////////////////////////////////////////////////////
+//                                                 //
+//  Enums and Utilities for ARM Instruction Format //
+//                                                 //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS                   \
+  ENTRY(ARM_FORMAT_PSEUDO,         0) \
+  ENTRY(ARM_FORMAT_MULFRM,         1) \
+  ENTRY(ARM_FORMAT_BRFRM,          2) \
+  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
+  ENTRY(ARM_FORMAT_DPFRM,          4) \
+  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
+  ENTRY(ARM_FORMAT_LDFRM,          6) \
+  ENTRY(ARM_FORMAT_STFRM,          7) \
+  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
+  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
+  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
+  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
+  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
+  ENTRY(ARM_FORMAT_EXTFRM,        13) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   14) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  15) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   16) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   20) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    21) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    23) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      24) \
+  ENTRY(ARM_FORMAT_NEONFRM,       25) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  26) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  27) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    28) \
+  ENTRY(ARM_FORMAT_MISCFRM,       29) \
+  ENTRY(ARM_FORMAT_THUMBMISCFRM,  30) \
+  ENTRY(ARM_FORMAT_NLdSt,         31) \
+  ENTRY(ARM_FORMAT_N1RegModImm,   32) \
+  ENTRY(ARM_FORMAT_N2Reg,         33) \
+  ENTRY(ARM_FORMAT_NVCVT,         34) \
+  ENTRY(ARM_FORMAT_NVecDupLn,     35) \
+  ENTRY(ARM_FORMAT_N2RegVecShL,   36) \
+  ENTRY(ARM_FORMAT_N2RegVecShR,   37) \
+  ENTRY(ARM_FORMAT_N3Reg,         38) \
+  ENTRY(ARM_FORMAT_N3RegVecSh,    39) \
+  ENTRY(ARM_FORMAT_NVecExtract,   40) \
+  ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
+  ENTRY(ARM_FORMAT_NVTBL,         42)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  ARM_FORMATS
+  ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const inline char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    ARM_FORMATS
+  case ARM_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+/// Expands on the enum definitions from ARMBaseInstrInfo.h.
+/// They are being used by the disassembler implementation.
+namespace ARMII {
+  enum {
+    NEONRegMask = 15,
+    GPRRegMask = 15,
+    NEON_RegRdShift = 12,
+    NEON_D_BitShift = 22,
+    NEON_RegRnShift = 16,
+    NEON_N_BitShift = 7,
+    NEON_RegRmShift = 0,
+    NEON_M_BitShift = 5
+  };
+}
+
+/// Utility function for extracting [From, To] bits from a uint32_t.
+static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
+  assert(From < 32 && To < 32 && From >= To);
+  return (Bits >> To) & ((1 << (From - To + 1)) - 1);
+}
+
+/// Utility function for setting [From, To] bits to Val for a uint32_t.
+static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
+                            uint32_t Val) {
+  assert(From < 32 && To < 32 && From >= To);
+  uint32_t Mask = ((1 << (From - To + 1)) - 1);
+  Bits &= ~(Mask << To);
+  Bits |= (Val & Mask) << To;
+}
+
+/// Various utilities for checking the target specific flags.
+
+/// A unary data processing instruction doesn't have an Rn operand.
+static inline bool isUnaryDP(unsigned TSFlags) {
+  return (TSFlags & ARMII::UnaryDP);
+}
+
+/// This four-bit field describes the addressing mode used.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getAddrMode(unsigned TSFlags) {
+  return (TSFlags & ARMII::AddrModeMask);
+}
+
+/// {IndexModePre, IndexModePost}
+/// Only valid for load and store ops.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getIndexMode(unsigned TSFlags) {
+  return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+}
+
+/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
+static inline bool isPrePostLdSt(unsigned TSFlags) {
+  return (TSFlags & ARMII::IndexModeMask) != 0;
+}
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+// Builder Object is mostly ignored except in some Thumb disassemble functions.
+typedef ARMBasicMCBuilder *BO;
+
+/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
+/// and builds the MCOperand list upon disassembly.  It returns false on failure
+/// or true on success.  The number of operands added is updated upon success.
+typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
+
+/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
+/// knows how to build up the MCOperand list.
+class ARMBasicMCBuilder {
+  unsigned Opcode;
+  ARMFormat Format;
+  unsigned short NumOps;
+  DisassembleFP Disasm;
+  Session *SP;
+
+public:
+  ARMBasicMCBuilder(ARMBasicMCBuilder &B)
+    : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
+      SP(B.SP)
+  {}
+
+  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+
+  virtual ~ARMBasicMCBuilder() {}
+
+  void setSession(Session *sp) {
+    SP = sp;
+  }
+
+  /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+  /// the possible Predicate and SBitModifier, to build the remaining MCOperand
+  /// constituents.
+  bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+
+  /// InITBlock - InITBlock returns true if we are inside an IT block.
+  bool InITBlock() {
+    if (SP)
+      return SP->ITCounter > 0;
+
+    return false;
+  }
+
+  /// Build - Build delegates to BuildIt to perform the heavy liftling.  After
+  /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
+  virtual bool Build(MCInst &MI, uint32_t insn) {
+    bool Status = BuildIt(MI, insn);
+    return RunBuildAfterHook(Status, MI, insn);
+  }
+
+  /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+  /// The general idea is to set the Opcode for the MCInst, followed by adding
+  /// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
+  /// to the Format-specific disassemble function for disassembly, followed by
+  /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
+  /// which follow the Dst/Src Operands.
+  virtual bool BuildIt(MCInst &MI, uint32_t insn);
+
+  /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+  /// after BuildIt is finished.
+  virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
+
+private:
+  /// Get condition of the current IT instruction.
+  unsigned GetITCond() {
+    assert(SP);
+    return slice(SP->ITState, 7, 4);
+  }
+};
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile
new file mode 100644
index 000000000000..031b6aca5a48
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDisassembler
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
new file mode 100644
index 000000000000..481f25d6f486
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -0,0 +1,2187 @@
+//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code for disassembling a Thumb instr.  It is to be included by
+// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
+// function which acts as the dispatcher to disassemble a Thumb instruction.
+//
+//===----------------------------------------------------------------------===//
+
+///////////////////////////////
+//                           //
+//     Utility Functions     //
+//                           //
+///////////////////////////////
+
+// Utilities for 16-bit Thumb instructions.
+/*
+15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+               [  tRt ]
+                      [ tRm ]  [ tRn ]  [ tRd ]
+                         D  [   Rm   ]  [  Rd ]
+
+                      [ imm3]
+               [    imm5    ]
+                   i     [    imm5   ]
+                            [       imm7      ]
+                         [       imm8         ]
+               [             imm11            ]
+
+            [   cond  ]
+*/
+
+// Extract tRt: Inst{10-8}.
+static inline unsigned getT1tRt(uint32_t insn) {
+  return slice(insn, 10, 8);
+}
+
+// Extract tRm: Inst{8-6}.
+static inline unsigned getT1tRm(uint32_t insn) {
+  return slice(insn, 8, 6);
+}
+
+// Extract tRn: Inst{5-3}.
+static inline unsigned getT1tRn(uint32_t insn) {
+  return slice(insn, 5, 3);
+}
+
+// Extract tRd: Inst{2-0}.
+static inline unsigned getT1tRd(uint32_t insn) {
+  return slice(insn, 2, 0);
+}
+
+// Extract [D:Rd]: Inst{7:2-0}.
+static inline unsigned getT1Rd(uint32_t insn) {
+  return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
+}
+
+// Extract Rm: Inst{6-3}.
+static inline unsigned getT1Rm(uint32_t insn) {
+  return slice(insn, 6, 3);
+}
+
+// Extract imm3: Inst{8-6}.
+static inline unsigned getT1Imm3(uint32_t insn) {
+  return slice(insn, 8, 6);
+}
+
+// Extract imm5: Inst{10-6}.
+static inline unsigned getT1Imm5(uint32_t insn) {
+  return slice(insn, 10, 6);
+}
+
+// Extract i:imm5: Inst{9:7-3}.
+static inline unsigned getT1Imm6(uint32_t insn) {
+  return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
+}
+
+// Extract imm7: Inst{6-0}.
+static inline unsigned getT1Imm7(uint32_t insn) {
+  return slice(insn, 6, 0);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getT1Imm8(uint32_t insn) {
+  return slice(insn, 7, 0);
+}
+
+// Extract imm11: Inst{10-0}.
+static inline unsigned getT1Imm11(uint32_t insn) {
+  return slice(insn, 10, 0);
+}
+
+// Extract cond: Inst{11-8}.
+static inline unsigned getT1Cond(uint32_t insn) {
+  return slice(insn, 11, 8);
+}
+
+static inline bool IsGPR(unsigned RegClass) {
+  return RegClass == ARM::GPRRegClassID;
+}
+
+// Utilities for 32-bit Thumb instructions.
+
+// Extract imm4: Inst{19-16}.
+static inline unsigned getImm4(uint32_t insn) {
+  return slice(insn, 19, 16);
+}
+
+// Extract imm3: Inst{14-12}.
+static inline unsigned getImm3(uint32_t insn) {
+  return slice(insn, 14, 12);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getImm8(uint32_t insn) {
+  return slice(insn, 7, 0);
+}
+
+// A8.6.61 LDRB (immediate, Thumb) and friends
+// +/-: Inst{9}
+// imm8: Inst{7-0}
+static inline int decodeImm8(uint32_t insn) {
+  int Offset = getImm8(insn);
+  return slice(insn, 9, 9) ? Offset : -Offset;
+}
+
+// Extract imm12: Inst{11-0}.
+static inline unsigned getImm12(uint32_t insn) {
+  return slice(insn, 11, 0);
+}
+
+// A8.6.63 LDRB (literal) and friends
+// +/-: Inst{23}
+// imm12: Inst{11-0}
+static inline int decodeImm12(uint32_t insn) {
+  int Offset = getImm12(insn);
+  return slice(insn, 23, 23) ? Offset : -Offset;
+}
+
+// Extract imm2: Inst{7-6}.
+static inline unsigned getImm2(uint32_t insn) {
+  return slice(insn, 7, 6);
+}
+
+// For BFI, BFC, t2SBFX, and t2UBFX.
+// Extract lsb: Inst{14-12:7-6}.
+static inline unsigned getLsb(uint32_t insn) {
+  return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// For BFI and BFC.
+// Extract msb: Inst{4-0}.
+static inline unsigned getMsb(uint32_t insn) {
+  return slice(insn, 4, 0);
+}
+
+// For t2SBFX and t2UBFX.
+// Extract widthminus1: Inst{4-0}.
+static inline unsigned getWidthMinus1(uint32_t insn) {
+  return slice(insn, 4, 0);
+}
+
+// For t2ADDri12 and t2SUBri12.
+// imm12 = i:imm3:imm8;
+static inline unsigned getIImm3Imm8(uint32_t insn) {
+  return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
+}
+
+// For t2MOVi16 and t2MOVTi16.
+// imm16 = imm4:i:imm3:imm8;
+static inline unsigned getImm16(uint32_t insn) {
+  return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
+    getImm3(insn) << 8 | getImm8(insn);
+}
+
+// Inst{5-4} encodes the shift type.
+static inline unsigned getShiftTypeBits(uint32_t insn) {
+  return slice(insn, 5, 4);
+}
+
+// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
+static inline unsigned getShiftAmtBits(uint32_t insn) {
+  return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// A8.6.17 BFC
+// Encoding T1 ARMv6T2, ARMv7
+// LLVM-specific encoding for #<lsb> and #<width>
+static inline uint32_t getBitfieldInvMask(uint32_t insn) {
+  uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
+  uint32_t msb = getMsb(insn);
+  uint32_t Val = 0;
+  assert(lsb <= msb && "Encoding error: lsb > msb");
+  for (uint32_t i = lsb; i <= msb; ++i)
+    Val |= (1 << i);
+  return ~Val;
+}
+
+// A8.4 Shifts applied to a register
+// A8.4.1 Constant shifts
+// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
+//
+// decodeImmShift() returns the shift amount and the the shift opcode.
+// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
+static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
+                                      ARM_AM::ShiftOpc &ShOp) {
+
+  assert(imm5 < 32 && "Invalid imm5 argument");
+  switch (bits2) {
+  default: assert(0 && "No such value");
+  case 0:
+    ShOp = ARM_AM::lsl;
+    return imm5;
+  case 1:
+    ShOp = ARM_AM::lsr;
+    return (imm5 == 0 ? 32 : imm5);
+  case 2:
+    ShOp = ARM_AM::asr;
+    return (imm5 == 0 ? 32 : imm5);
+  case 3:
+    ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
+    return (imm5 == 0 ? 1 : imm5);
+  }
+}
+
+// A6.3.2 Modified immediate constants in Thumb instructions
+//
+// ThumbExpandImm() returns the modified immediate constant given an imm12 for
+// Thumb data-processing instructions with modified immediate.
+// See also A6.3.1 Data-processing (modified immediate).
+static inline unsigned ThumbExpandImm(unsigned imm12) {
+  assert(imm12 <= 0xFFF && "Invalid imm12 argument");
+
+  // If the leading two bits is 0b00, the modified immediate constant is
+  // obtained by splatting the low 8 bits into the first byte, every other byte,
+  // or every byte of a 32-bit value.
+  //
+  // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
+  // performed.
+
+  if (slice(imm12, 11, 10) == 0) {
+    unsigned short control = slice(imm12, 9, 8);
+    unsigned imm8 = slice(imm12, 7, 0);
+    switch (control) {
+    default:
+      assert(0 && "No such value");
+      return 0;
+    case 0:
+      return imm8;
+    case 1:
+      return imm8 << 16 | imm8;
+    case 2:
+      return imm8 << 24 | imm8 << 8;
+    case 3:
+      return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
+    }
+  } else {
+    // A rotate is required.
+    unsigned Val = 1 << 7 | slice(imm12, 6, 0);
+    unsigned Amt = slice(imm12, 11, 7);
+    return ARM_AM::rotr32(Val, Amt);
+  }
+}
+
+static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
+  bool S = slice(insn, 26, 26);
+  bool J1 = slice(insn, 13, 13);
+  bool J2 = slice(insn, 11, 11);
+  unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm21 |= 1 << 20;
+  if (J2) Imm21 |= 1 << 19;
+  if (J1) Imm21 |= 1 << 18;
+
+  return SignExtend32<21>(Imm21);
+}
+
+static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BL(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BLX(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+// See, for example, A8.6.221 SXTAB16.
+static inline unsigned decodeRotate(uint32_t insn) {
+  unsigned rotate = slice(insn, 5, 4);
+  return rotate << 3;
+}
+
+///////////////////////////////////////////////
+//                                           //
+// Thumb1 instruction disassembly functions. //
+//                                           //
+///////////////////////////////////////////////
+
+// See "Utilities for 16-bit Thumb instructions" for register naming convention.
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare
+//
+// shift immediate:         tRd CPSR tRn imm5
+// add/sub register:        tRd CPSR tRn tRm
+// add/sub 3-bit immediate: tRd CPSR tRn imm3
+// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
+// mov/cmp immediate:       tRt [CPSR] imm8 (CPSR present for mov)
+//
+// Special case:
+// tMOVSr:                  tRd tRn
+static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
+         && "Invalid arguments");
+
+  bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
+
+  // Use Rt implies use imm8.
+  bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
+                Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(ARM::tGPRRegClassID,
+                                  UseRt ? getT1tRt(insn) : getT1tRd(insn))));
+  ++OpIdx;
+
+  // Check whether the next operand to be added is a CCR Register.
+  if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+    assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+    MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR));
+    ++OpIdx;
+  }
+
+  // Check whether the next operand to be added is a Thumb1 Register.
+  assert(OpIdx < NumOps && "More operands expected");
+  if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // For UseRt, the reg operand is tied to the first reg operand.
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(ARM::tGPRRegClassID,
+                                    UseRt ? getT1tRt(insn) : getT1tRn(insn))));
+    ++OpIdx;
+  }
+
+  // Special case for tMOVSr.
+  if (OpIdx == NumOps)
+    return true;
+
+  // The next available operand is either a reg operand or an imm operand.
+  if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // Three register operand instructions.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                       getT1tRm(insn))));
+  } else {
+    assert(OpInfo[OpIdx].RegClass == 0 &&
+           !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+           && "Pure imm operand expected");
+    MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
+                                             : (Imm3 ? getT1Imm3(insn)
+                                                     : getT1Imm5(insn))));
+  }
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.2.2 Data-processing
+//
+// tCMPr, tTST, tCMN: tRd tRn
+// tMVN, tRSB:        tRd CPSR tRn
+// Others:            tRd CPSR tRd(TIED_TO) tRn
+static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass == ARM::CCRRegClassID
+          || OpInfo[1].RegClass == ARM::tGPRRegClassID)
+         && "Invalid arguments");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+  ++OpIdx;
+
+  // Check whether the next operand to be added is a CCR Register.
+  if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+    assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+    MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR));
+    ++OpIdx;
+  }
+
+  // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
+  // Process the TIED_TO operand first.
+
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+         && "Thumb reg operand expected");
+  int Idx;
+  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    // The reg operand is tied to the first reg operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Process possible next reg operand.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // Add tRn operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                       getT1tRn(insn))));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.2.3 Special data instructions and branch and exchange
+//
+// tADDhirr: Rd Rd(TIED_TO) Rm
+// tCMPhir:  Rd Rm
+// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
+// tBX_RET: 0 operand
+// tBX_RET_vararg: Rm
+// tBLXr_r9: Rm
+static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  // tBX_RET has 0 operand.
+  if (NumOps == 0)
+    return true;
+
+  // BX/BLX has 1 reg operand: Rm.
+  if (NumOps == 1) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       getT1Rm(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Add the destination operand.
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass,
+                                  IsGPR(RegClass) ? getT1Rd(insn)
+                                                  : getT1tRd(insn))));
+  ++OpIdx;
+
+  // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
+  // Process the TIED_TO operand first.
+
+  assert(OpIdx < NumOps && "More operands expected");
+  int Idx;
+  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    // The reg operand is tied to the first reg operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // The next reg operand is either Rm or tRn.
+  assert(OpIdx < NumOps && "More operands expected");
+  RegClass = OpInfo[OpIdx].RegClass;
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(RegClass,
+                                  IsGPR(RegClass) ? getT1Rm(insn)
+                                                  : getT1tRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A8.6.59 LDR (literal)
+//
+// tLDRpci: tRt imm8*4
+static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass == 0 &&
+          !OpInfo[1].isPredicate() &&
+          !OpInfo[1].isOptionalDef())
+         && "Invalid arguments");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+
+  // And the (imm8 << 2) operand.
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// Thumb specific addressing modes (see ARMInstrThumb.td):
+//
+// t_addrmode_rr := reg + reg
+//
+// t_addrmode_s4 := reg + reg
+//                  reg + imm5 * 4
+//
+// t_addrmode_s2 := reg + reg
+//                  reg + imm5 * 2
+//
+// t_addrmode_s1 := reg + reg
+//                  reg + imm5
+//
+// t_addrmode_sp := sp + imm8 * 4
+//
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register (reg|imm):      tRd tRn imm5 tRm
+// Load Register Signed Byte|Halfword: tRd tRn tRm
+static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Table A6-5 16-bit Thumb Load/store instructions
+  // opA = 0b0101 for STR/LDR (register) and friends.
+  // Otherwise, we have STR/LDR (immediate) and friends.
+  bool Imm5 = (opA != 5);
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::tGPRRegClassID
+         && OpInfo[1].RegClass == ARM::tGPRRegClassID
+         && "Expect >= 2 operands and first two as thumb reg operands");
+
+  // Add the destination reg and the base reg.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRn(insn))));
+  OpIdx = 2;
+
+  // We have either { imm5, tRm } or { tRm } remaining.
+  // Process the imm5 first.  Note that STR/LDR (register) should skip the imm5
+  // offset operand for t_addrmode_s[1|2|4].
+
+  assert(OpIdx < NumOps && "More operands expected");
+
+  if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() &&
+      !OpInfo[OpIdx].isOptionalDef()) {
+
+    MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
+    ++OpIdx;
+  }
+
+  // The next reg operand is tRm, the offset.
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+         && "Thumb reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(Imm5 ? 0
+                                          : getRegisterEnum(ARM::tGPRRegClassID,
+                                                            getT1tRm(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register SP relative: tRt ARM::SP imm8
+static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+         && "Invalid opcode");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         (OpInfo[2].RegClass == 0 &&
+          !OpInfo[2].isPredicate() &&
+          !OpInfo[2].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateReg(ARM::SP));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 3;
+  return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.10 ADR
+//
+// tADDrPCi: tRt imm8
+static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert(Opcode == ARM::tADDrPCi && "Invalid opcode");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass == 0 &&
+          !OpInfo[1].isPredicate() &&
+          !OpInfo[1].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.8 ADD (SP plus immediate)
+//
+// tADDrSPi: tRt ARM::SP imm8
+static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert(Opcode == ARM::tADDrSPi && "Invalid opcode");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         (OpInfo[2].RegClass == 0 &&
+          !OpInfo[2].isPredicate() &&
+          !OpInfo[2].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateReg(ARM::SP));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 3;
+  return true;
+}
+
+// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
+//
+// where register_list = low registers + [lr] for PUSH or
+//                       low registers + [pc] for POP
+//
+// "low registers" is specified by Inst{7-0}
+// lr|pc is specified by Inst{8}
+static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Invalid opcode");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+  OpIdx = 2;
+
+  // Fill the variadic part of reglist.
+  unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
+    | slice(insn, 7, 0);
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         i)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions
+// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
+//
+// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
+// t2IT:             firstcond=Inst{7-4} mask=Inst{3-0}
+// tCBNZ, tCBZ:      tRd imm6*2
+// tBKPT:            imm8
+// tNOP, tSEV, tYIELD, tWFE, tWFI:
+//   no operand (except predicate pair)
+// tSETENDBE, tSETENDLE, :
+//   no operand
+// Others:           tRd tRn
+static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  if (NumOps == 0)
+    return true;
+
+  if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
+    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  // Predicate operands are handled elsewhere.
+  if (NumOps == 2 &&
+      OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
+      OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+    return true;
+  }
+
+  if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
+    // Special case handling for tADDspi and tSUBspi.
+    // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
+    MI.addOperand(MCOperand::CreateReg(ARM::SP));
+    MI.addOperand(MCOperand::CreateReg(ARM::SP));
+    MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
+    NumOpsAdded = 3;
+    return true;
+  }
+
+  if (Opcode == ARM::t2IT) {
+    // Special case handling for If-Then.
+    // A8.6.50 IT
+    // Tag the (firstcond[0] bit << 4) along with mask.
+
+    // firstcond
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
+
+    // firstcond[0] and mask
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+    NumOpsAdded = 2;
+    return true;
+  }
+
+  if (Opcode == ARM::tBKPT) {
+    MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // CPS has a singleton $opt operand that contains the following information:
+  // opt{4-0} = don't care
+  // opt{5} = 0 (false)
+  // opt{8-6} = AIF from Inst{2-0}
+  // opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
+  if (Opcode == ARM::tCPS) {
+    unsigned Option = slice(insn, 2, 0) << 6 | slice(insn, 4, 4) << 9 | 1 << 10;
+    MI.addOperand(MCOperand::CreateImm(Option));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+         && "Expect >=2 operands");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+
+  if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
+    // Two register instructions.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                       getT1tRn(insn))));
+  } else {
+    // CBNZ, CBZ
+    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) && "Invalid opcode");
+    MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
+  }
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// A8.6.53  LDM / LDMIA
+// A8.6.189 STM / STMIA
+//
+// tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDM:              tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD ||
+          Opcode == ARM::tSTM_UPD) && "Invalid opcode");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  unsigned tRt = getT1tRt(insn);
+  unsigned RegListBits = slice(insn, 7, 0);
+
+  OpIdx = 0;
+
+  // WB register, if necessary.
+  if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       tRt)));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     tRt)));
+  ++OpIdx;
+
+  // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)));
+  ++OpIdx;
+
+  // Handling the two predicate operands before the reglist.
+  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+  OpIdx += 2;
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 8; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID,
+                                                         i)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded);
+}
+
+static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded);
+}
+
+// A8.6.16 B Encoding T1
+// cond = Inst{11-8} & imm8 = Inst{7-0}
+// imm32 = SignExtend(imm8:'0', 32)
+//
+// tBcc: offset Pred-Imm Pred-CCR
+// tSVC: imm8 Pred-Imm Pred-CCR
+// tTRAP: 0 operand (early return)
+static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  if (Opcode == ARM::tTRAP)
+    return true;
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
+         OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
+         && "Exactly 3 operands expected");
+
+  unsigned Imm8 = getT1Imm8(insn);
+  MI.addOperand(MCOperand::CreateImm(
+                  Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1) + 4
+                                      : (int)Imm8));
+
+  // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+// A8.6.16 B Encoding T2
+// imm11 = Inst{10-0}
+// imm32 = SignExtend(imm11:'0', 32)
+//
+// tB: offset
+static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
+
+  unsigned Imm11 = getT1Imm11(insn);
+
+  // When executing a Thumb instruction, PC reads as the address of the current
+  // instruction plus 4.  The assembler subtracts 4 from the difference between
+  // the branch instruction and the target address, disassembler has to add 4 to
+  // to compensate.
+  MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4));
+
+  NumOpsAdded = 1;
+
+  return true;
+
+}
+
+// See A6.2 16-bit Thumb instruction encoding for instruction classes
+// corresponding to op.
+//
+// Table A6-1 16-bit Thumb instruction encoding (abridged)
+// op		Instruction or instruction class
+// ------	--------------------------------------------------------------------
+// 00xxxx	Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000	Data-processing on page A6-8
+// 010001	Special data instructions and branch and exchange on page A6-9
+// 01001x	Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx	Load/store single data item on page A6-10
+// 011xxx
+// 100xxx
+// 10100x	Generate PC-relative address, see ADR on page A8-32
+// 10101x	Generate SP-relative address, see ADD (SP plus immediate) on page A8-28
+// 1011xx	Miscellaneous 16-bit instructions on page A6-11
+// 11000x	Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x	Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx	Conditional branch, and Supervisor Call on page A6-13
+// 11100x	Unconditional Branch, see B on page A8-44
+//
+static bool DisassembleThumb1(uint16_t op,
+    MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+  unsigned op1 = slice(op, 5, 4);
+  unsigned op2 = slice(op, 3, 2);
+  unsigned op3 = slice(op, 1, 0);
+  unsigned opA = slice(op, 5, 2);
+  switch (op1) {
+  case 0:
+    // A6.2.1 Shift (immediate), add, subtract, move, and compare
+    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                    Builder);
+  case 1:
+    switch (op2) {
+    case 0:
+      switch (op3) {
+      case 0:
+        // A6.2.2 Data-processing
+        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   Builder);
+      case 1:
+        // A6.2.3 Special data instructions and branch and exchange
+        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded);
+      default:
+        // A8.6.59 LDR (literal)
+        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+      break;
+    default:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded);
+      break;
+    }
+    break;
+  case 2:
+    switch (op2) {
+    case 0:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded);
+    case 1:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded);
+    case 2:
+      if (op3 <= 1) {
+        // A8.6.10 ADR
+        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded);
+      } else {
+        // A8.6.8 ADD (SP plus immediate)
+        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+    default:
+      // A6.2.5 Miscellaneous 16-bit instructions
+      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded);
+    }
+    break;
+  case 3:
+    switch (op2) {
+    case 0:
+      if (op3 <= 1) {
+        // A8.6.189 STM / STMIA / STMEA
+        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+      } else {
+        // A8.6.53 LDM / LDMIA / LDMFD
+        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+    case 1:
+      // A6.2.6 Conditional branch, and Supervisor Call
+      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded);
+    case 2:
+      // Unconditional Branch, see B on page A8-44
+      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded);
+    default:
+      assert(0 && "Unreachable code");
+      break;
+    }
+    break;
+  default:
+    assert(0 && "Unreachable code");
+    break;
+  }
+
+  return false;
+}
+
+///////////////////////////////////////////////
+//                                           //
+// Thumb2 instruction disassembly functions. //
+//                                           //
+///////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////
+//                                                       //
+// Note: the register naming follows the ARM convention! //
+//                                                       //
+///////////////////////////////////////////////////////////
+
+static inline bool Thumb2SRSOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2SRSDBW: case ARM::t2SRSDB:
+  case ARM::t2SRSIAW: case ARM::t2SRSIA:
+    return true;
+  }
+}
+
+static inline bool Thumb2RFEOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2RFEDBW: case ARM::t2RFEDB:
+  case ARM::t2RFEIAW: case ARM::t2RFEIA:
+    return true;
+  }
+}
+
+// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
+static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+  MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+  NumOpsAdded = 1;
+  return true;
+}
+
+// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
+static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  NumOpsAdded = 1;
+  return true;
+}
+
+static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  if (Thumb2SRSOpcode(Opcode))
+    return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  if (Thumb2RFEOpcode(Opcode))
+    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD ||
+          Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD)
+         && "Invalid opcode");
+  assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base.
+  if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+  ++OpIdx;
+
+  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+  ++OpIdx;
+
+  // Handling the two predicate operands before the reglist.
+  MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+  OpIdx += 2;
+
+  // Fill the variadic part of reglist.
+  unsigned RegListBits = insn & ((1 << 16) - 1);
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         i)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// t2LDREX: Rd Rn
+// t2LDREXD: Rd Rs Rn
+// t2LDREXB, t2LDREXH: Rd Rn
+// t2STREX: Rs Rd Rn
+// t2STREXD: Rm Rd Rs Rn
+// t2STREXB, t2STREXH: Rm Rd Rn
+static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect >=2 operands and first two as reg operands");
+
+  bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
+  bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
+  bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
+
+  // Add the destination operand for store.
+  if (isStore) {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(ARM::GPRRegClassID,
+                                    isSW ? decodeRs(insn) : decodeRm(insn))));
+    ++OpIdx;
+  }
+
+  // Source operand for store and destination operand for load.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Thumb2 doubleword complication: with an extra source/destination operand.
+  if (isDW) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    ++OpIdx;
+  }
+
+  // Finally add the pointer operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm.
+// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA.
+//
+// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
+// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
+// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
+//
+// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
+// disassembly only and do not have a tied_to writeback base register operand.
+static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 4
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && OpInfo[2].RegClass == ARM::GPRRegClassID
+         && OpInfo[3].RegClass == 0
+         && "Expect >= 4 operands and first 3 as reg operands");
+
+  // Add the <Rt> <Rt2> operands.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  // Finally add (+/-)imm8*4, depending on the U bit.
+  int Offset = getImm8(insn) * 4;
+  if (getUBit(insn) == 0)
+    Offset = -Offset;
+  MI.addOperand(MCOperand::CreateImm(Offset));
+  NumOpsAdded = 4;
+
+  return true;
+}
+
+// PC-based defined for Codegen, which do not get decoded by design:
+//
+// t2TBB, t2TBH: Rm immDontCare immDontCare
+//
+// Generic version defined for disassembly:
+//
+// t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR
+static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  assert(NumOps >= 2 && "Expect >= 2 operands");
+
+  // The generic version of TBB/TBH needs a base register.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  // Add the index register.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
+  case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
+  case ARM::t2LSLri:    case ARM::t2LSRri:
+  case ARM::t2ASRri:    case ARM::t2RORri:
+    return true;
+  }
+}
+
+// A6.3.11 Data-processing (shifted register)
+//
+// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
+// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
+// Three register operands: Rs Rn Rm
+// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
+//
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms: (Rm, ConstantShiftSpecifier).
+// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
+//
+// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
+// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
+// nothing else, because the shift amount is already specified.
+// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
+static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Special case handling.
+  if (Opcode == ARM::t2BR_JT) {
+    assert(NumOps == 4
+           && OpInfo[0].RegClass == ARM::GPRRegClassID
+           && OpInfo[1].RegClass == ARM::GPRRegClassID
+           && OpInfo[2].RegClass == 0
+           && OpInfo[3].RegClass == 0
+           && "Exactlt 4 operands expect and first two as reg operands");
+    // Only need to populate the src reg operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateReg(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+    NumOpsAdded = 4;
+    return true;
+  }
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect >= 2 operands and first two as reg operands");
+
+  bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID);
+  bool NoDstReg = (decodeRs(insn) == 0xF);
+
+  // Build the register operands, followed by the constant shift specifier.
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(ARM::GPRRegClassID,
+                                  NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    int Idx;
+    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+      // Process tied_to operand constraint.
+      MI.addOperand(MI.getOperand(Idx));
+    } else {
+      assert(!NoDstReg && "Internal error");
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         decodeRn(insn))));
+    }
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  if (NumOps == OpIdx)
+    return true;
+
+  if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+      && !OpInfo[OpIdx].isOptionalDef()) {
+
+    if (Thumb2ShiftOpcode(Opcode))
+      MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn)));
+    else {
+      // Build the constant shift specifier operand.
+      unsigned bits2 = getShiftTypeBits(insn);
+      unsigned imm5 = getShiftAmtBits(insn);
+      ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
+      unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
+
+      // PKHBT/PKHTB are special in that we need the decodeImmShift() call to
+      // decode the shift amount from raw imm5 and bits2, but we DO NOT need
+      // to encode the ShOp, as it's in the asm string already.
+      if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB)
+        MI.addOperand(MCOperand::CreateImm(ShAmt));
+      else
+        MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+    }
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.1 Data-processing (modified immediate)
+//
+// Two register operands: Rs Rn ModImm
+// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm - {t2MOVi, t2MVNi}
+//
+// ModImm = ThumbExpandImm(i:imm3:imm8)
+static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && "Expect >= 2 operands and first one as reg operand");
+
+  bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+  bool NoDstReg = (decodeRs(insn) == 0xF);
+
+  // Build the register operands, followed by the modified immediate.
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(ARM::GPRRegClassID,
+                                  NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+  ++OpIdx;
+
+  if (TwoReg) {
+    assert(!NoDstReg && "Internal error");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // The modified immediate operand should come next.
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+         !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  // i:imm3:imm8
+  // A6.3.2 Modified immediate constants in Thumb instructions
+  unsigned imm12 = getIImm3Imm8(insn);
+  MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
+  ++OpIdx;
+
+  return true;
+}
+
+static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16:
+  case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
+  switch (Opcode) {
+  case ARM::t2SSATlsl:
+  case ARM::t2SSATasr:
+    return slice(insn, 4, 0) + 1;
+  case ARM::t2SSAT16:
+    return slice(insn, 3, 0) + 1;
+  case ARM::t2USATlsl:
+  case ARM::t2USATasr:
+    return slice(insn, 4, 0);
+  case ARM::t2USAT16:
+    return slice(insn, 3, 0);
+  default:
+    assert(0 && "Invalid opcode passed in");
+    return 0;
+  }
+}
+
+// A6.3.3 Data-processing (plain binary immediate)
+//
+// o t2ADDri12, t2SUBri12: Rs Rn imm12
+// o t2LEApcrel (ADR): Rs imm12
+// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
+// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010)
+// o t2MOVi16: Rs imm16
+// o t2MOVTi16: Rs imm16
+// o t2SBFX (SBFX): Rs Rn lsb width
+// o t2UBFX (UBFX): Rs Rn lsb width
+// o t2BFI (BFI): Rs Rn lsb width
+//
+// [Signed|Unsigned] Saturate [16]
+//
+// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
+// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && "Expect >= 2 operands and first one as reg operand");
+
+  bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+
+  // Build the register operand(s), followed by the immediate(s).
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  ++OpIdx;
+
+  // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd.
+  if (Thumb2SaturateOpcode(Opcode)) {
+    MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
+
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+
+    if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
+      OpIdx += 2;
+      return true;
+    }
+
+    // For SSAT operand reg (Rn) has been disassembled above.
+    // Now disassemble the shift amount.
+
+    // Inst{14-12:7-6} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+
+    MI.addOperand(MCOperand::CreateImm(ShAmt));
+
+    OpIdx += 3;
+    return true;
+  }
+
+  if (TwoReg) {
+    assert(NumOps >= 3 && "Expect >= 3 operands");
+    int Idx;
+    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+      // Process tied_to operand constraint.
+      MI.addOperand(MI.getOperand(Idx));
+    } else {
+      // Add src reg operand.
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                         decodeRn(insn))));
+    }
+    ++OpIdx;
+  }
+
+  assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+         && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  // Pre-increment OpIdx.
+  ++OpIdx;
+
+  if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
+      || Opcode == ARM::t2LEApcrel)
+    MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
+  else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
+    MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+  else if (Opcode == ARM::t2BFC)
+    MI.addOperand(MCOperand::CreateImm(getBitfieldInvMask(insn)));
+  else {
+    // Handle the case of: lsb width
+    assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
+            Opcode == ARM::t2BFI) && "Invalid opcode");
+    MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
+    if (Opcode == ARM::t2BFI) {
+      assert(getMsb(insn) >= getLsb(insn) && "Encoding error");
+      MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
+    } else
+      MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.4 Table A6-15 Miscellaneous control instructions
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool t2MiscCtrlInstr(uint32_t insn) {
+  if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
+      slice(insn, 12, 12) == 0)
+    return true;
+
+  return false;
+}
+
+// A6.3.4 Branches and miscellaneous control
+//
+// A8.6.16 B
+// Branches: t2B, t2Bcc -> imm operand
+//
+// Branches: t2TPsoft -> no operand
+//
+// A8.6.23 BL, BLX (immediate)
+// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
+//
+// A8.6.26
+// t2BXJ -> Rn
+//
+// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants),
+// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX
+//   -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
+//
+// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
+//   -> no operand (except pred-imm pred-ccr)
+//
+// t2DBG -> imm4 = Inst{3-0}
+//
+// t2MRS/t2MRSsys -> Rs
+// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
+// t2SMC -> imm4 = Inst{19-16}
+static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  if (NumOps == 0)
+    return true;
+
+  if (t2MiscCtrlInstr(insn))
+    return true;
+
+  switch (Opcode) {
+  case ARM::t2CLREX:
+  case ARM::t2NOP:
+  case ARM::t2YIELD:
+  case ARM::t2WFE:
+  case ARM::t2WFI:
+  case ARM::t2SEV:
+    return true;
+  default:
+    break;
+  }
+
+  // CPS has a singleton $opt operand that contains the following information:
+  // opt{4-0} = mode from Inst{4-0}
+  // opt{5} = changemode from Inst{8}
+  // opt{8-6} = AIF from Inst{7-5}
+  // opt{10-9} = imod from Inst{10-9} with 0b10 as enable and 0b11 as disable
+  if (Opcode == ARM::t2CPS) {
+    unsigned Option = slice(insn, 4, 0) | slice(insn, 8, 8) << 5 |
+      slice(insn, 7, 5) << 6 | slice(insn, 10, 9) << 9;
+    MI.addOperand(MCOperand::CreateImm(Option));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // DBG has its option specified in Inst{3-0}.
+  if (Opcode == ARM::t2DBG) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // MRS and MRSsys take one GPR reg Rs.
+  if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // BXJ takes one GPR reg Rn.
+  if (Opcode == ARM::t2BXJ) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // MSR and MSRsys take one GPR reg Rn, followed by the mask.
+  if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8)));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // SMC take imm4.
+  if (Opcode == ARM::t2SMC) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // Add the imm operand.
+  int Offset = 0;
+
+  switch (Opcode) {
+  default:
+    assert(0 && "Unreachable code");
+    return false;
+  case ARM::t2B:
+    Offset = decodeImm32_B_EncodingT4(insn);
+    break;
+  case ARM::t2Bcc:
+    Offset = decodeImm32_B_EncodingT3(insn);
+    break;
+  case ARM::tBLr9:
+    Offset = decodeImm32_BL(insn);
+    break;
+  case ARM::tBLXi_r9:
+    Offset = decodeImm32_BLX(insn);
+    break;
+  }
+  // When executing a Thumb instruction, PC reads as the address of the current
+  // instruction plus 4.  The assembler subtracts 4 from the difference between
+  // the branch instruction and the target address, disassembler has to add 4 to
+  // to compensate.
+  MI.addOperand(MCOperand::CreateImm(Offset + 4));
+
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2PLDi12:   case ARM::t2PLDi8:   case ARM::t2PLDpci:
+  case ARM::t2PLDr:     case ARM::t2PLDs:
+  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:  case ARM::t2PLDWpci:
+  case ARM::t2PLDWr:    case ARM::t2PLDWs:
+  case ARM::t2PLIi12:   case ARM::t2PLIi8:   case ARM::t2PLIpci:
+  case ARM::t2PLIr:     case ARM::t2PLIs:
+    return true;
+  }
+}
+
+static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  // Preload Data/Instruction requires either 2 or 3 operands.
+  // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
+  // t2PLDr:                      Rn Rm
+  // t2PLDs:                      Rn Rm imm2=Inst{5-4}
+  // Same pattern applies for t2PLDW* and t2PLI*.
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first one as reg operand");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+  } else {
+    assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+           && !OpInfo[OpIdx].isOptionalDef()
+           && "Pure imm operand expected");
+    int Offset = 0;
+    if (Opcode == ARM::t2PLDpci || Opcode == ARM::t2PLDWpci ||
+             Opcode == ARM::t2PLIpci) {
+      bool Negative = slice(insn, 23, 23) == 0;
+      unsigned Imm12 = getImm12(insn);
+      Offset = Negative ? -1 - Imm12 : 1 * Imm12;      
+    } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+               Opcode == ARM::t2PLIi8) {
+      // A8.6.117 Encoding T2: add = FALSE
+      unsigned Imm8 = getImm8(insn);
+      Offset = -1 - Imm8;
+    } else // The i12 forms.  See, for example, A8.6.117 Encoding T1.
+      Offset = decodeImm12(insn);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  }
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+      !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == 0 &&
+         "Expect >= 2 operands, first as reg, and second as imm operand");
+
+  // Build the register operand, followed by the (+/-)imm12 immediate.
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// A6.3.10 Store single data item
+// A6.3.9 Load byte, memory hints
+// A6.3.8 Load halfword, memory hints
+// A6.3.7 Load word
+//
+// For example,
+//
+// t2LDRi12:   Rd Rn (+)imm12
+// t2LDRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDR_PRE:  Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// t2STRi12:   Rd Rn (+)imm12
+// t2STRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also DisassembleThumb2DPSoReg)
+// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STR_PRE:  Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
+// correctly, as LLVM AsmPrinter depends on it.  For indexed stores, the first
+// operand is Rn; for all the other instructions, Rd is the first operand.
+//
+// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
+// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
+static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  unsigned Rn = decodeRn(insn);
+
+  if (Thumb2PreloadOpcode(Opcode))
+    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+  if (Load && Rn == 15)
+    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 3 operands and first two as reg operands");
+
+  bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID);
+  bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
+  bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
+
+  // Build the register operands, followed by the immediate.
+  unsigned R0, R1, R2 = 0;
+  unsigned Rd = decodeRd(insn);
+  int Imm = 0;
+
+  if (!Load && TIED_TO) {
+    R0 = Rn;
+    R1 = Rd;
+  } else {
+    R0 = Rd;
+    R1 = Rn;
+  }
+  if (ThreeReg) {
+    if (TIED_TO) {
+      R2 = Rn;
+      Imm = decodeImm8(insn);
+    } else {
+      R2 = decodeRm(insn);
+      // See, for example, A8.6.64 LDRB (register).
+      // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
+      // LSL is the default shift opc, and LLVM does not expect it to be encoded
+      // as part of the immediate operand.
+      // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
+      Imm = slice(insn, 5, 4);
+    }
+  } else {
+    if (Imm12)
+      Imm = getImm12(insn);
+    else
+      Imm = decodeImm8(insn);
+  }
+  
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R0)));
+  ++OpIdx;
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R1)));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,R2)));
+    ++OpIdx;
+  }
+
+  assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+         && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  MI.addOperand(MCOperand::CreateImm(Imm));
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.3.12 Data-processing (register)
+//
+// Two register operands [rotate]:   Rs Rm [rotation(= (rotate:'000'))]
+// Three register operands only:     Rs Rn Rm
+// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
+//
+// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
+//
+// Miscellaneous operations: Rs [Rn] Rm
+static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first two as reg operands");
+
+  // Build the register operands, followed by the optional rotation amount.
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Add the rotation amount immediate.
+    MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.16 Multiply, multiply accumulate, and absolute difference
+//
+// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
+// t2MUL, t2SMMUL:                 Rs Rn Rm
+// t2SMLA[BB|BT|TB|TT|WB|WT]:      Rs Rn Rm Ra=Inst{15-12}
+// t2SMUL[BB|BT|TB|TT|WB|WT]:      Rs Rn Rm
+//
+// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
+//   Rs Rn Rm Ra=Inst{15-12}
+//
+// Unsigned Sum of Absolute Differences [and Accumulate]
+//    Rs Rn Rm [Ra=Inst{15-12}]
+static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         OpInfo[2].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 3 operands and first three as reg operands");
+
+  // Build the register operands.
+
+  bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (FourReg)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+
+  NumOpsAdded = FourReg ? 4 : 3;
+
+  return true;
+}
+
+// A6.3.17 Long multiply, long multiply accumulate, and divide
+//
+// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
+static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         OpInfo[2].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 3 operands and first three as reg operands");
+
+  bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+
+  // Build the register operands.
+
+  if (FourReg)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (FourReg)
+    NumOpsAdded = 4;
+  else
+    NumOpsAdded = 3;
+
+  return true;
+}
+
+// See A6.3 32-bit Thumb instruction encoding for instruction classes
+// corresponding to (op1, op2, op).
+//
+// Table A6-9 32-bit Thumb instruction encoding
+// op1	op2		op	Instruction class, see
+// ---	-------	--	------------------------------------------------------------
+// 01	00xx0xx	-	Load/store multiple on page A6-23
+// 		00xx1xx	-	Load/store dual, load/store exclusive, table branch on page A6-24
+// 		01xxxxx	-	Data-processing (shifted register) on page A6-31
+// 		1xxxxxx	-	Coprocessor instructions on page A6-40
+// 10	x0xxxxx	0	Data-processing (modified immediate) on page A6-15
+// 		x1xxxxx	0	Data-processing (plain binary immediate) on page A6-19
+// 		-		1	Branches and miscellaneous control on page A6-20
+// 11	000xxx0	-	Store single data item on page A6-30
+// 		001xxx0	-	Advanced SIMD element or structure load/store instructions on page A7-27
+// 		00xx001 -	Load byte, memory hints on page A6-28
+// 		00xx011	-	Load halfword, memory hints on page A6-26
+// 		00xx101	-	Load word on page A6-25
+// 		00xx111	-	UNDEFINED
+// 		010xxxx	-	Data-processing (register) on page A6-33
+// 		0110xxx	-	Multiply, multiply accumulate, and absolute difference on page A6-38
+// 		0111xxx	-	Long multiply, long multiply accumulate, and divide on page A6-39
+// 		1xxxxxx	-	Coprocessor instructions on page A6-40
+//
+static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
+    MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+
+  switch (op1) {
+  case 1:
+    if (slice(op2, 6, 5) == 0) {
+      if (slice(op2, 2, 2) == 0) {
+        // Load/store multiple.
+        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+
+      // Load/store dual, load/store exclusive, table branch, otherwise.
+      assert(slice(op2, 2, 2) == 1 && "Encoding error");
+      if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
+          (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
+        // Load/store exclusive.
+        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+      if (Opcode == ARM::t2LDRDi8 ||
+          Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
+          Opcode == ARM::t2STRDi8 ||
+          Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
+        // Load/store dual.
+        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+      if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) {
+        // Table branch.
+        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+    } else if (slice(op2, 6, 5) == 1) {
+      // Data-processing (shifted register).
+      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded);
+    }
+
+    // FIXME: A6.3.18 Coprocessor instructions
+    // But see ThumbDisassembler::getInstruction().
+
+    break;
+  case 2:
+    if (op == 0) {
+      if (slice(op2, 5, 5) == 0) {
+        // Data-processing (modified immediate)
+        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+      } else {
+        // Data-processing (plain binary immediate)
+        return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+    } else {
+      // Branches and miscellaneous control on page A6-20.
+      return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded);
+    }
+
+    break;
+  case 3:
+    switch (slice(op2, 6, 5)) {
+    case 0:
+      // Load/store instructions...
+      if (slice(op2, 0, 0) == 0) {
+        if (slice(op2, 4, 4) == 0) {
+          // Store single data item on page A6-30
+          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded);
+        } else {
+          // FIXME: Advanced SIMD element or structure load/store instructions.
+          // But see ThumbDisassembler::getInstruction().
+          ;
+        }
+      } else {
+        // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
+        return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded);
+      }
+      break;
+    case 1:
+      if (slice(op2, 4, 4) == 0) {
+        // A6.3.12 Data-processing (register)
+        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded);
+      } else if (slice(op2, 3, 3) == 0) {
+        // A6.3.16 Multiply, multiply accumulate, and absolute difference
+        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded);
+      } else {
+        // A6.3.17 Long multiply, long multiply accumulate, and divide
+        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded);
+      }
+      break;
+    default:
+      // FIXME: A6.3.18 Coprocessor instructions
+      // But see ThumbDisassembler::getInstruction().
+      ;
+      break;
+    }
+
+    break;
+  default:
+    assert(0 && "Encoding error for Thumb2 instruction!");
+    break;
+  }
+
+  return false;
+}
+
+static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+  uint16_t HalfWord = slice(insn, 31, 16);
+
+  if (HalfWord == 0) {
+    // A6.2 16-bit Thumb instruction encoding
+    // op = bits[15:10]
+    uint16_t op = slice(insn, 15, 10);
+    return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                             Builder);
+  }
+
+  unsigned bits15_11 = slice(HalfWord, 15, 11);
+
+  // A6.1 Thumb instruction set encoding
+  assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) &&
+         "Bits [15:11] of first halfword of a Thumb2 instruction out of range");
+
+  // A6.3 32-bit Thumb instruction encoding
+  
+  uint16_t op1 = slice(HalfWord, 12, 11);
+  uint16_t op2 = slice(HalfWord, 10, 4);
+  uint16_t op = slice(insn, 15, 15);
+
+  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded);
+}
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index a8dd38cb362e..b7ed14abed78 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
                 ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
                 ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
                 ARMGenDAGISel.inc ARMGenSubtarget.inc \
-                ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+		ARMGenDecoderTables.inc
 
-DIRS = AsmPrinter AsmParser TargetInfo
+DIRS = AsmPrinter AsmParser Disassembler TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index ad98839ce262..b10c3f74c70a 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -37,7 +37,7 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
                                    const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC == ARM::GPRRegisterClass) {
@@ -98,7 +98,7 @@ void Thumb1InstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   assert((RC == ARM::tGPRRegisterClass ||
@@ -125,7 +125,7 @@ void Thumb1InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   assert((RC == ARM::tGPRRegisterClass ||
@@ -154,7 +154,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 99c38b1f4d4b..a5dfcb34f7bc 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -398,7 +398,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
   // off the frame pointer (if, for example, there are alloca() calls in
   // the function, the offset will be negative. Use R12 instead since that's
   // a call clobbered register that we know won't be used in Thumb1 mode.
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
     addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
 
@@ -685,8 +685,7 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
   NumBytes = (NumBytes + 3) & ~3;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 55163f9b820c..de4605669315 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -41,7 +41,7 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                               unsigned DestReg, unsigned SrcReg,
                               const TargetRegisterClass *DestRC,
                               const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC == ARM::GPRRegisterClass &&
@@ -66,7 +66,7 @@ void Thumb2InstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
@@ -90,7 +90,7 @@ void Thumb2InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 5d8310ec840b..94c6f80c0361 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -471,8 +471,7 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
 
   SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
                                   DAG.getNode(AlphaISD::GlobalRetAddr,
-                                              DebugLoc::getUnknownLoc(),
-                                              MVT::i64),
+                                              DebugLoc(), MVT::i64),
                                   SDValue());
   switch (Outs.size()) {
   default:
@@ -740,8 +739,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
                              SA2, NULL, 0, MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
-    return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
-                       MVT::i64);
+    return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
       //FIXME: implement
   case ISD::FRAMEADDR:          break;
   }
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index d539e082118b..ba403e22ce35 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -112,7 +112,7 @@ unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) && 
          "Alpha branch conditions have two components!");
@@ -153,7 +153,7 @@ bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     return false;
   }
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   if (DestRC == Alpha::GPRCRegisterClass) {
@@ -185,7 +185,7 @@ AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   //     << FrameIdx << "\n";
   //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   if (RC == Alpha::F4RCRegisterClass)
@@ -211,7 +211,7 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         const TargetRegisterClass *RC) const {
   //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
   //     << FrameIdx << "\n";
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   if (RC == Alpha::F4RCRegisterClass)
@@ -398,7 +398,7 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
 void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, 
                                 MachineBasicBlock::iterator MI) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
     .addReg(Alpha::R31)
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
index 0c51bc554be9..34be470f03e3 100644
--- a/lib/Target/Alpha/AlphaLLRP.cpp
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -49,7 +49,7 @@ namespace {
       const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
       bool Changed = false;
       MachineInstr* prev[3] = {0,0,0};
-      DebugLoc dl = DebugLoc::getUnknownLoc();
+      DebugLoc dl;
       unsigned count = 0;
       for (MachineFunction::iterator FI = F.begin(), FE = F.end();
            FI != FE; ++FI) {
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 55eec3ae4450..16a23cc120fb 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -207,8 +207,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
   bool FP = hasFP(MF);
 
   //handle GOP offset
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index 3fd5d4dc0bf1..cf1901ba917b 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -106,7 +106,7 @@ InsertBranch(MachineBasicBlock &MBB,
              MachineBasicBlock *FBB,
              const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc operand
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
 
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -116,7 +116,7 @@ InsertBranch(MachineBasicBlock &MBB,
   if (Cond.empty()) {
     // Unconditional branch?
     assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, dl, get(BF::JUMPa)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
     return 1;
   }
 
@@ -139,27 +139,27 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                      unsigned SrcReg,
                                      const TargetRegisterClass *DestRC,
                                      const TargetRegisterClass *SrcRC) const {
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
 
   if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
       inClass(BF::ALLRegClass, SrcReg,  SrcRC)) {
-    BuildMI(MBB, I, dl, get(BF::MOVE), DestReg).addReg(SrcReg);
+    BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg);
     return true;
   }
 
   if (inClass(BF::D16RegClass, DestReg, DestRC) &&
       inClass(BF::D16RegClass, SrcReg,  SrcRC)) {
-    BuildMI(MBB, I, dl, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0);
+    BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0);
     return true;
   }
 
   if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) &&
       inClass(BF::DRegClass, DestReg, DestRC)) {
     if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) {
-      BuildMI(MBB, I, dl, get(BF::MOVENCC_z), DestReg).addReg(SrcReg);
-      BuildMI(MBB, I, dl, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
+      BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
     } else {
-      BuildMI(MBB, I, dl, get(BF::MOVECC_zext), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg).addReg(SrcReg);
     }
     return true;
   }
@@ -167,21 +167,21 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   if (inClass(BF::AnyCCRegClass, DestReg, DestRC) &&
       inClass(BF::DRegClass, SrcReg,  SrcRC)) {
     if (inClass(BF::NotCCRegClass, DestReg, DestRC))
-      BuildMI(MBB, I, dl, get(BF::SETEQri_not), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg).addReg(SrcReg);
     else
-      BuildMI(MBB, I, dl, get(BF::MOVECC_nz), DestReg).addReg(SrcReg);
+      BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg).addReg(SrcReg);
     return true;
   }
 
   if (inClass(BF::NotCCRegClass, DestReg, DestRC) &&
       inClass(BF::JustCCRegClass, SrcReg,  SrcRC)) {
-    BuildMI(MBB, I, dl, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg);
+    BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg);
     return true;
   }
 
   if (inClass(BF::JustCCRegClass, DestReg, DestRC) &&
       inClass(BF::NotCCRegClass, SrcReg,  SrcRC)) {
-    BuildMI(MBB, I, dl, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg);
+    BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg);
     return true;
   }
 
@@ -197,8 +197,7 @@ BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        bool isKill,
                                        int FI,
                                        const TargetRegisterClass *RC) const {
-  DebugLoc DL = I != MBB.end() ?
-    I->getDebugLoc() : DebugLoc::getUnknownLoc();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
 
   if (inClass(BF::DPRegClass, SrcReg, RC)) {
     BuildMI(MBB, I, DL, get(BF::STORE32fi))
@@ -244,8 +243,7 @@ BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         unsigned DestReg,
                                         int FI,
                                         const TargetRegisterClass *RC) const {
-  DebugLoc DL = I != MBB.end() ?
-    I->getDebugLoc() : DebugLoc::getUnknownLoc();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   if (inClass(BF::DPRegClass, DestReg, RC)) {
     BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
       .addFrameIndex(FI)
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 84dc9cac504a..6fd610fa3b53 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -384,9 +384,7 @@ void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  DebugLoc dl = (MBBI != MBB.end()
-                 ? MBBI->getDebugLoc()
-                 : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   int FrameSize = MFI->getStackSize();
   if (FrameSize%4) {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 86825c81861b..423da3b4dbeb 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -262,7 +262,7 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   // we instruction select bitconvert i64 -> f64 as a noop for example, so our
   // types have no specific meaning.
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   if (DestRC == SPU::R8CRegisterClass) {
@@ -317,7 +317,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     llvm_unreachable("Unknown regclass!");
   }
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   addFrameReference(BuildMI(MBB, MI, DL, get(opc))
                     .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
@@ -351,7 +351,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
   }
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
 }
@@ -553,7 +553,7 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB,
                            const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index f3071f2e5697..ad034ebcd839 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -452,8 +452,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Prepare for debug frame info.
   bool hasDebugInfo = MMI && MMI->hasDebugInfo();
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index a7e8eb7d554f..01f317471ee7 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -106,7 +106,7 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
 /// instruction.
 void MBlazeInstrInfo::
 insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   BuildMI(MBB, MI, DL, get(MBlaze::NOP));
 }
@@ -116,8 +116,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
              unsigned DestReg, unsigned SrcReg,
              const TargetRegisterClass *DestRC,
              const TargetRegisterClass *SrcRC) const {
-  DebugLoc dl = DebugLoc::getUnknownLoc();
-  llvm::BuildMI(MBB, I, dl, get(MBlaze::ADD), DestReg)
+  DebugLoc DL;
+  llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
       .addReg(SrcReg).addReg(MBlaze::R0);
   return true;
 }
@@ -126,8 +126,8 @@ void MBlazeInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
-  DebugLoc dl = DebugLoc::getUnknownLoc();
-  BuildMI(MBB, I, dl, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
     .addImm(0).addFrameIndex(FI);
 }
 
@@ -135,8 +135,8 @@ void MBlazeInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
-  DebugLoc dl = DebugLoc::getUnknownLoc();
-  BuildMI(MBB, I, dl, get(MBlaze::LWI), DestReg)
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
       .addImm(0).addFrameIndex(FI);
 }
 
@@ -185,11 +185,9 @@ unsigned MBlazeInstrInfo::
 InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
              MachineBasicBlock *FBB,
              const SmallVectorImpl<MachineOperand> &Cond) const {
-  DebugLoc dl = DebugLoc::getUnknownLoc();
-
   // Can only insert uncond branches so far.
   assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
-  BuildMI(&MBB, dl, get(MBlaze::BRI)).addMBB(TBB);
+  BuildMI(&MBB, DebugLoc(), get(MBlaze::BRI)).addMBB(TBB);
   return 1;
 }
 
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 6d528a2488cf..a12310a2bd32 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -302,8 +302,7 @@ emitPrologue(MachineFunction &MF) const {
   MachineFrameInfo *MFI    = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Get the right frame order for MBlaze.
   adjustMBlazeStackFrame(MF);
@@ -319,13 +318,13 @@ emitPrologue(MachineFunction &MF) const {
   int RAOffset = MBlazeFI->getRAStackOffset();
 
   // Adjust stack : addi R1, R1, -imm
-  BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+  BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDI), MBlaze::R1)
       .addReg(MBlaze::R1).addImm(-StackSize);
 
   // Save the return address only if the function isnt a leaf one.
   // swi  R15, R1, stack_loc
   if (MFI->hasCalls()) {
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
         .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
   }
 
@@ -333,11 +332,11 @@ emitPrologue(MachineFunction &MF) const {
   // to point to the stack pointer
   if (hasFP(MF)) {
     // swi  R19, R1, stack_loc
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
       .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1);
 
     // add R19, R1, R0
-    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R19)
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
       .addReg(MBlaze::R1).addReg(MBlaze::R0);
   }
 }
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index e584770dd497..03819041067c 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -33,7 +33,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MI,
                                     unsigned SrcReg, bool isKill, int FrameIdx,
                                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -60,7 +60,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
                                            unsigned DestReg, int FrameIdx,
                                            const TargetRegisterClass *RC) const{
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -86,7 +86,7 @@ bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
                                    const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC == SrcRC) {
@@ -134,7 +134,7 @@ MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   MachineFunction &MF = *MBB.getParent();
@@ -158,7 +158,7 @@ MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i)
@@ -323,7 +323,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                               MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc operand
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
 
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -333,18 +333,18 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   if (Cond.empty()) {
     // Unconditional branch?
     assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, dl, get(MSP430::JMP)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(TBB);
     return 1;
   }
 
   // Conditional branch.
   unsigned Count = 0;
-  BuildMI(&MBB, dl, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
+  BuildMI(&MBB, DL, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
   ++Count;
 
   if (FBB) {
     // Two-way Conditional branch. Insert the second branch.
-    BuildMI(&MBB, dl, get(MSP430::JMP)).addMBB(FBB);
+    BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(FBB);
     ++Count;
   }
   return Count;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index daac68324cb3..d91783a80c83 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -283,8 +283,7 @@ void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
-                 DebugLoc::getUnknownLoc());
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 85cf064b9f52..dbd3c24dc1cd 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -123,7 +123,7 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
 void MipsInstrInfo::
 insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const 
 {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   BuildMI(MBB, MI, DL, get(Mips::NOP));
 }
@@ -133,7 +133,7 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
              unsigned DestReg, unsigned SrcReg,
              const TargetRegisterClass *DestRC,
              const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -191,7 +191,7 @@ void MipsInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI, 
                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == Mips::CPURegsRegisterClass) 
@@ -225,7 +225,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const 
 {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == Mips::CPURegsRegisterClass) 
@@ -523,7 +523,7 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
              MachineBasicBlock *FBB,
              const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f3c87bc5554a..f43e69b35457 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -397,8 +397,7 @@ emitPrologue(MachineFunction &MF) const
   MachineFrameInfo *MFI    = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
 
   // Get the right frame order for Mips.
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index da4e02787691..a223540170a7 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -256,15 +256,11 @@ void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
 ///
 void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,  
                                   const DebugLoc &DL, bool IsInBeginFunction) {
-  if (! EmitDebugDirectives) return;
-  assert (! DL.isUnknown()  && "can't change to invalid debug loc");
-
-  DILocation Loc = MF.getDILocation(DL);
-  MDNode *CU = Loc.getScope().getNode();
-  unsigned line = Loc.getLineNumber();
+  if (!EmitDebugDirectives) return;
+  assert(!DL.isUnknown() && "can't change to invalid debug loc");
 
-  SwitchToCU(CU);
-  SwitchToLine(line, IsInBeginFunction);
+  SwitchToCU(DL.getScope(MF.getFunction()->getContext()));
+  SwitchToLine(DL.getLine(), IsInBeginFunction);
 }
 
 /// SwitchToLine - Emit line directive for a new line.
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index da16e8383c31..365e8b20b7a1 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -72,7 +72,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          unsigned SrcReg, bool isKill, int FI,
                                          const TargetRegisterClass *RC) const {
   PIC16TargetLowering *PTLI = TM.getTargetLowering();
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   const Function *Func = MBB.getParent()->getFunction();
@@ -114,7 +114,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           unsigned DestReg, int FI,
                                           const TargetRegisterClass *RC) const {
   PIC16TargetLowering *PTLI = TM.getTargetLowering();
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   const Function *Func = MBB.getParent()->getFunction();
@@ -154,7 +154,7 @@ bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *DestRC,
                                    const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC == PIC16::FSR16RegisterClass) {
@@ -202,7 +202,7 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   if (FBB == 0) { // One way branch.
     if (Cond.empty()) {
       // Unconditional branch?
-      DebugLoc dl = DebugLoc::getUnknownLoc();
+      DebugLoc dl;
       BuildMI(&MBB, dl, get(PIC16::br_uncond)).addMBB(TBB);
     }
     return 1;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 4f88d35deaf8..1e323849d1ef 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -215,7 +215,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
   
   const TargetInstrInfo &TII = *TM.getInstrInfo();
   MachineBasicBlock &EntryBB = *Fn.begin();
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Emit the following code into the entry block:
   // InVRSAVE = MFVRSAVE
   // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
@@ -253,7 +253,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
     // Insert the set of GlobalBaseReg into the first MBB of the function
     MachineBasicBlock &FirstMBB = MF->front();
     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-    DebugLoc dl = DebugLoc::getUnknownLoc();
+    DebugLoc dl;
 
     if (PPCLowering.getPointerTy() == MVT::i32) {
       GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e67666d80481..dda530eef434 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1122,7 +1122,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
     // With PIC, the first instruction is actually "GR+hi(&G)".
     Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
                      DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
+                                 DebugLoc(), PtrVT), Hi);
   }
 
   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
@@ -1155,7 +1155,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
     // With PIC, the first instruction is actually "GR+hi(&G)".
     Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
                      DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
+                                 DebugLoc(), PtrVT), Hi);
   }
 
   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
@@ -1192,7 +1192,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
     // With PIC, the first instruction is actually "GR+hi(&G)".
     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
                      DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
+                                 DebugLoc(), PtrVT), Hi);
   }
 
   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
@@ -1233,7 +1233,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
     // With PIC, the first instruction is actually "GR+hi(&G)".
     Hi = DAG.getNode(ISD::ADD, dl, PtrVT,
                      DAG.getNode(PPCISD::GlobalBaseReg,
-                                 DebugLoc::getUnknownLoc(), PtrVT), Hi);
+                                 DebugLoc(), PtrVT), Hi);
   }
 
   Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
@@ -5540,15 +5540,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 }
 
 /// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove lowering.
-/// If DstAlign is zero that means it's safe to destination alignment can
-/// satisfy any constraint. Similarly if SrcAlign is zero it means there
-/// isn't a need to check it against alignment requirement, probably because
-/// the source does not need to be loaded. It returns EVT::Other if
-/// SelectionDAG should be responsible for determining it.
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. It returns EVT::Other if SelectionDAG should be responsible
+/// for determining it.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
-                                           bool SafeToUseFP,
+                                           bool NonScalarIntSafe,
                                            SelectionDAG &DAG) const {
   if (this->PPCSubTarget.isPPC64()) {
     return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 19fefab2d3a0..f816bddaf5a3 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -348,15 +348,19 @@ namespace llvm {
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
     
     /// getOptimalMemOpType - Returns the target specific optimal type for load
-    /// and store operations as a result of memset, memcpy, and memmove lowering.
-    /// If DstAlign is zero that means it's safe to destination alignment can
-    /// satisfy any constraint. Similarly if SrcAlign is zero it means there
-    /// isn't a need to check it against alignment requirement, probably because
-    /// the source does not need to be loaded. It returns EVT::Other if
-    /// SelectionDAG should be responsible for determining it.
-    virtual EVT getOptimalMemOpType(uint64_t Size,
-                                    unsigned DstAlign, unsigned SrcAlign,
-                                    bool SafeToUseFP, SelectionDAG &DAG) const;
+    /// and store operations as a result of memset, memcpy, and memmove
+    /// lowering. If DstAlign is zero that means it's safe to destination
+    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+    /// means there isn't a need to check it against alignment requirement,
+    /// probably because the source does not need to be loaded. If
+    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+    /// from memory. It returns EVT::Other if SelectionDAG should be responsible
+    /// for determining it.
+    virtual EVT
+    getOptimalMemOpType(uint64_t Size,
+                        unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, SelectionDAG &DAG) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.
     virtual unsigned getFunctionAlignment(const Function *F) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 82c637efaf25..6b0a282af09c 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -199,7 +199,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 
 void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, 
                               MachineBasicBlock::iterator MI) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   BuildMI(MBB, MI, DL, get(PPC::NOP));
@@ -317,7 +317,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB,
                            const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) && 
@@ -350,7 +350,7 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     return false;
   }
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   if (DestRC == PPC::GPRCRegisterClass) {
@@ -380,7 +380,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                   int FrameIdx,
                                   const TargetRegisterClass *RC,
                                   SmallVectorImpl<MachineInstr*> &NewMIs) const{
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (RC == PPC::GPRCRegisterClass) {
     if (SrcReg != PPC::LR) {
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
@@ -635,7 +635,7 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                    const TargetRegisterClass *RC) const {
   MachineFunction &MF = *MBB.getParent();
   SmallVector<MachineInstr*, 4> NewMIs;
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 44c5fe672f86..9363aa8b7566 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1281,7 +1281,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
        !MF.getFunction()->doesNotThrow() ||
        UnwindTablesMandatory;
@@ -1521,7 +1521,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
   unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
 
   assert( (RetOpcode == PPC::BLR ||
            RetOpcode == PPC::TCRETURNri ||
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 15b26c29872f..9e148ada8853 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -68,7 +68,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
     if (I->getDesc().hasDelaySlot()) {
       MachineBasicBlock::iterator J = I;
       ++J;
-      BuildMI(MBB, J, DebugLoc::getUnknownLoc(), TII->get(SP::NOP));
+      BuildMI(MBB, J, DebugLoc(), TII->get(SP::NOP));
       ++FilledSlots;
       Changed = true;
     }
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 8667bca7fe96..e494d7ddf9f8 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -111,7 +111,7 @@ SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              MachineBasicBlock *FBB,
                              const SmallVectorImpl<MachineOperand> &Cond)const{
   // FIXME this should probably take a DebugLoc argument
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Can only insert uncond branches so far.
   assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
   BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB);
@@ -128,7 +128,7 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     return false;
   }
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC == SP::IntRegsRegisterClass)
@@ -149,7 +149,7 @@ void SparcInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
@@ -170,7 +170,7 @@ void SparcInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == SP::IntRegsRegisterClass)
@@ -253,7 +253,7 @@ unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
   GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
 
 
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
 
   BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
   SparcFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 740e3bc14f2b..08373bb83869 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -125,8 +125,7 @@ void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Get the number of bytes to allocate from the FrameInfo
   int NumBytes = (int) MFI->getStackSize();
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 06f01e7b32b8..c92caa4e9ec0 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -62,7 +62,7 @@ void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MI,
                                     unsigned SrcReg, bool isKill, int FrameIdx,
                                     const TargetRegisterClass *RC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   unsigned Opc = 0;
@@ -91,7 +91,7 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
                                            unsigned DestReg, int FrameIdx,
                                            const TargetRegisterClass *RC) const{
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   unsigned Opc = 0;
@@ -120,7 +120,7 @@ bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                     unsigned DestReg, unsigned SrcReg,
                                     const TargetRegisterClass *DestRC,
                                     const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   // Determine if DstRC and SrcRC have a common superclass.
@@ -273,7 +273,7 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   MachineFunction &MF = *MBB.getParent();
@@ -347,7 +347,7 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   MachineFunction &MF = *MBB.getParent();
@@ -521,7 +521,7 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME: this should probably have a DebugLoc operand
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -530,19 +530,19 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   if (Cond.empty()) {
     // Unconditional branch?
     assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, dl, get(SystemZ::JMP)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
     return 1;
   }
 
   // Conditional branch.
   unsigned Count = 0;
   SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
-  BuildMI(&MBB, dl, getBrCond(CC)).addMBB(TBB);
+  BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
   ++Count;
 
   if (FBB) {
     // Two-way Conditional branch. Insert the second branch.
-    BuildMI(&MBB, dl, get(SystemZ::JMP)).addMBB(FBB);
+    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
     ++Count;
   }
   return Count;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index ca2fe6f920ba..302c418d1ec9 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -194,8 +194,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
     Chunk = (1LL << 15) - 1;
   }
 
-  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
-                 DebugLoc::getUnknownLoc());
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   while (Offset) {
     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
@@ -215,8 +214,7 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
   SystemZMachineFunctionInfo *SystemZMFI =
     MF.getInfo<SystemZMachineFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
-                 DebugLoc::getUnknownLoc());
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Get the number of bytes to allocate from the FrameInfo.
   // Note that area for callee-saved stuff is already allocated, thus we need to
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 7b7b5cb4f574..7328dc0ba8d7 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -21,8 +21,8 @@
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include "X86GenRegisterNames.inc"
@@ -30,6 +30,14 @@
 using namespace llvm;
 using namespace llvm::X86Disassembler;
 
+void x86DisassemblerDebug(const char *file,
+                          unsigned line,
+                          const char *s) {
+  dbgs() << file << ":" << line << ": " << s;
+}
+
+#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+
 namespace llvm {  
   
 // Fill-ins to make the compiler happy.  These constants are never actually
@@ -50,8 +58,8 @@ extern Target TheX86_32Target, TheX86_64Target;
 
 }
 
-static void translateInstruction(MCInst &target,
-                                 InternalInstruction &source);
+static bool translateInstruction(MCInst &target,
+                                InternalInstruction &source);
 
 X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
     MCDisassembler(),
@@ -106,14 +114,13 @@ bool X86GenericDisassembler::getInstruction(MCInst &instr,
                               address,
                               fMode);
 
-  if(ret) {
+  if (ret) {
     size = internalInstr.readerCursor - address;
     return false;
   }
   else {
     size = internalInstr.length;
-    translateInstruction(instr, internalInstr);
-    return true;
+    return !translateInstruction(instr, internalInstr);
   }
 }
 
@@ -151,29 +158,35 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
 /// @param mcInst       - The MCInst to append to.
 /// @param insn         - The internal instruction to extract the R/M field
 ///                       from.
-static void translateRMRegister(MCInst &mcInst,
+/// @return             - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
                                 InternalInstruction &insn) {
-  assert(insn.eaBase != EA_BASE_sib && insn.eaBase != EA_BASE_sib64 && 
-         "A R/M register operand may not have a SIB byte");
+  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+    debug("A R/M register operand may not have a SIB byte");
+    return true;
+  }
   
   switch (insn.eaBase) {
+  default:
+    debug("Unexpected EA base register");
+    return true;
   case EA_BASE_NONE:
-    llvm_unreachable("EA_BASE_NONE for ModR/M base");
-    break;
+    debug("EA_BASE_NONE for ModR/M base");
+    return true;
 #define ENTRY(x) case EA_BASE_##x:
   ALL_EA_BASES
 #undef ENTRY
-    llvm_unreachable("A R/M register operand may not have a base; "
-                     "the operand must be a register.");
-    break;
-#define ENTRY(x)                                                        \
+    debug("A R/M register operand may not have a base; "
+          "the operand must be a register.");
+    return true;
+#define ENTRY(x)                                                      \
   case EA_REG_##x:                                                    \
     mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
   ALL_REGS
 #undef ENTRY
-  default:
-    llvm_unreachable("Unexpected EA base register");
   }
+  
+  return false;
 }
 
 /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
@@ -186,7 +199,8 @@ static void translateRMRegister(MCInst &mcInst,
 /// @param sr           - Whether or not to emit the segment register.  The
 ///                       LEA instruction does not expect a segment-register
 ///                       operand.
-static void translateRMMemory(MCInst &mcInst,
+/// @return             - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst,
                               InternalInstruction &insn,
                               bool sr) {
   // Addresses in an MCInst are represented as five operands:
@@ -211,7 +225,8 @@ static void translateRMMemory(MCInst &mcInst,
     if (insn.sibBase != SIB_BASE_NONE) {
       switch (insn.sibBase) {
       default:
-        llvm_unreachable("Unexpected sibBase");
+        debug("Unexpected sibBase");
+        return true;
 #define ENTRY(x)                                          \
       case SIB_BASE_##x:                                  \
         baseReg = MCOperand::CreateReg(X86::x); break;
@@ -225,7 +240,8 @@ static void translateRMMemory(MCInst &mcInst,
     if (insn.sibIndex != SIB_INDEX_NONE) {
       switch (insn.sibIndex) {
       default:
-        llvm_unreachable("Unexpected sibIndex");
+        debug("Unexpected sibIndex");
+        return true;
 #define ENTRY(x)                                          \
       case SIB_INDEX_##x:                                 \
         indexReg = MCOperand::CreateReg(X86::x); break;
@@ -241,9 +257,10 @@ static void translateRMMemory(MCInst &mcInst,
   } else {
     switch (insn.eaBase) {
     case EA_BASE_NONE:
-      assert(insn.eaDisplacement != EA_DISP_NONE && 
-             "EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
-      
+      if (insn.eaDisplacement == EA_DISP_NONE) {
+        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+        return true;
+      }
       if (insn.mode == MODE_64BIT)
         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
       else
@@ -271,8 +288,8 @@ static void translateRMMemory(MCInst &mcInst,
       indexReg = MCOperand::CreateReg(0);
       switch (insn.eaBase) {
       default:
-        llvm_unreachable("Unexpected eaBase");
-        break;
+        debug("Unexpected eaBase");
+        return true;
         // Here, we will use the fill-ins defined above.  However,
         //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
         //   sib and sib64 were handled in the top-level if, so they're only
@@ -285,9 +302,9 @@ static void translateRMMemory(MCInst &mcInst,
 #define ENTRY(x) case EA_REG_##x:
       ALL_REGS
 #undef ENTRY
-        llvm_unreachable("A R/M memory operand may not be a register; "
-                         "the base field must be a base.");
-            break;
+        debug("A R/M memory operand may not be a register; "
+              "the base field must be a base.");
+        return true;
       }
     }
     
@@ -315,6 +332,8 @@ static void translateRMMemory(MCInst &mcInst,
   
   if (sr)
     mcInst.addOperand(segmentReg);
+  
+  return false;
 }
 
 /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
@@ -324,12 +343,14 @@ static void translateRMMemory(MCInst &mcInst,
 /// @param operand      - The operand, as stored in the descriptor table.
 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
 ///                       from.
-static void translateRM(MCInst &mcInst,
-                        OperandSpecifier &operand,
-                        InternalInstruction &insn) {
+/// @return             - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst,
+                       OperandSpecifier &operand,
+                       InternalInstruction &insn) {
   switch (operand.type) {
   default:
-    llvm_unreachable("Unexpected type for a R/M operand");
+    debug("Unexpected type for a R/M operand");
+    return true;
   case TYPE_R8:
   case TYPE_R16:
   case TYPE_R32:
@@ -345,8 +366,7 @@ static void translateRM(MCInst &mcInst,
   case TYPE_DEBUGREG:
   case TYPE_CR32:
   case TYPE_CR64:
-    translateRMRegister(mcInst, insn);
-    break;
+    return translateRMRegister(mcInst, insn);
   case TYPE_M:
   case TYPE_M8:
   case TYPE_M16:
@@ -364,11 +384,9 @@ static void translateRM(MCInst &mcInst,
   case TYPE_M1616:
   case TYPE_M1632:
   case TYPE_M1664:
-    translateRMMemory(mcInst, insn, true);
-    break;
+    return translateRMMemory(mcInst, insn, true);
   case TYPE_LEA:
-    translateRMMemory(mcInst, insn, false);
-    break;
+    return translateRMMemory(mcInst, insn, false);
   }
 }
   
@@ -377,11 +395,17 @@ static void translateRM(MCInst &mcInst,
 ///
 /// @param mcInst       - The MCInst to append to.
 /// @param stackPos     - The stack position to translate.
-static void translateFPRegister(MCInst &mcInst,
-                                uint8_t stackPos) {
-  assert(stackPos < 8 && "Invalid FP stack position");
+/// @return             - 0 on success; nonzero otherwise.
+static bool translateFPRegister(MCInst &mcInst,
+                               uint8_t stackPos) {
+  if (stackPos >= 8) {
+    debug("Invalid FP stack position");
+    return true;
+  }
   
   mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+
+  return false;
 }
 
 /// translateOperand - Translates an operand stored in an internal instruction 
@@ -390,25 +414,27 @@ static void translateFPRegister(MCInst &mcInst,
 /// @param mcInst       - The MCInst to append to.
 /// @param operand      - The operand, as stored in the descriptor table.
 /// @param insn         - The internal instruction.
-static void translateOperand(MCInst &mcInst,
-                             OperandSpecifier &operand,
-                             InternalInstruction &insn) {
+/// @return             - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst,
+                            OperandSpecifier &operand,
+                            InternalInstruction &insn) {
   switch (operand.encoding) {
   default:
-    llvm_unreachable("Unhandled operand encoding during translation");
+    debug("Unhandled operand encoding during translation");
+    return true;
   case ENCODING_REG:
     translateRegister(mcInst, insn.reg);
-    break;
+    return false;
   case ENCODING_RM:
-    translateRM(mcInst, operand, insn);
-    break;
+    return translateRM(mcInst, operand, insn);
   case ENCODING_CB:
   case ENCODING_CW:
   case ENCODING_CD:
   case ENCODING_CP:
   case ENCODING_CO:
   case ENCODING_CT:
-    llvm_unreachable("Translation of code offsets isn't supported.");
+    debug("Translation of code offsets isn't supported.");
+    return true;
   case ENCODING_IB:
   case ENCODING_IW:
   case ENCODING_ID:
@@ -417,24 +443,22 @@ static void translateOperand(MCInst &mcInst,
   case ENCODING_Ia:
     translateImmediate(mcInst, 
                        insn.immediates[insn.numImmediatesTranslated++]);
-    break;
+    return false;
   case ENCODING_RB:
   case ENCODING_RW:
   case ENCODING_RD:
   case ENCODING_RO:
     translateRegister(mcInst, insn.opcodeRegister);
-    break;
+    return false;
   case ENCODING_I:
-    translateFPRegister(mcInst, insn.opcodeModifier);
-    break;
+    return translateFPRegister(mcInst, insn.opcodeModifier);
   case ENCODING_Rv:
     translateRegister(mcInst, insn.opcodeRegister);
-    break;
+    return false;
   case ENCODING_DUP:
-    translateOperand(mcInst,
-                     insn.spec->operands[operand.type - TYPE_DUP0],
-                     insn);
-    break;
+    return translateOperand(mcInst,
+                            insn.spec->operands[operand.type - TYPE_DUP0],
+                            insn);
   }
 }
   
@@ -443,9 +467,13 @@ static void translateOperand(MCInst &mcInst,
 ///
 /// @param mcInst       - The MCInst to populate with the instruction's data.
 /// @param insn         - The internal instruction.
-static void translateInstruction(MCInst &mcInst,
-                                 InternalInstruction &insn) {  
-  assert(insn.spec);
+/// @return             - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+                                InternalInstruction &insn) {  
+  if (!insn.spec) {
+    debug("Instruction has no specification");
+    return true;
+  }
   
   mcInst.setOpcode(insn.instructionID);
   
@@ -454,9 +482,14 @@ static void translateInstruction(MCInst &mcInst,
   insn.numImmediatesTranslated = 0;
   
   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
-    if (insn.spec->operands[index].encoding != ENCODING_NONE)                
-      translateOperand(mcInst, insn.spec->operands[index], insn);
+    if (insn.spec->operands[index].encoding != ENCODING_NONE) {
+      if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+        return true;
+      }
+    }
   }
+  
+  return false;
 }
 
 static MCDisassembler *createX86_32Disassembler(const Target &T) {
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 4f02ed46e38b..db694bc2f3c5 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -13,7 +13,6 @@
  *
  *===----------------------------------------------------------------------===*/
 
-#include <assert.h>   /* for assert()     */
 #include <stdarg.h>   /* for va_*()       */
 #include <stdio.h>    /* for vsnprintf()  */
 #include <stdlib.h>   /* for exit()       */
@@ -26,17 +25,20 @@
 #define TRUE  1
 #define FALSE 0
 
+typedef int8_t bool;
+
 #ifdef __GNUC__
 #define NORETURN __attribute__((noreturn))
 #else
 #define NORETURN
 #endif
 
-#define unreachable(s)                                      \
-  do {                                                      \
-    fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, s);  \
-    exit(-1);                                               \
-  } while (0);
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
 
 /*
  * contextForAttrs - Client for the instruction context table.  Takes a set of
@@ -84,7 +86,6 @@ static int modRMRequired(OpcodeType type,
   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
     modrm_type != MODRM_ONEENTRY;
   
-  unreachable("Unknown opcode type");
   return 0;
 }
 
@@ -96,16 +97,18 @@ static int modRMRequired(OpcodeType type,
  * @param insnContext - See modRMRequired().
  * @param opcode      - See modRMRequired().
  * @param modRM       - The ModR/M byte if required, or any value if not.
+ * @return            - The UID of the instruction, or 0 on failure.
  */
 static InstrUID decode(OpcodeType type,
-                               InstructionContext insnContext,
-                               uint8_t opcode,
-                               uint8_t modRM) {
+                       InstructionContext insnContext,
+                       uint8_t opcode,
+                       uint8_t modRM) {
   struct ModRMDecision* dec;
   
   switch (type) {
   default:
-    unreachable("Unknown opcode type");
+    debug("Unknown opcode type");
+    return 0;
   case ONEBYTE:
     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
     break;
@@ -122,7 +125,8 @@ static InstrUID decode(OpcodeType type,
   
   switch (dec->modrm_type) {
   default:
-    unreachable("Corrupt table!  Unknown modrm_type");
+    debug("Corrupt table!  Unknown modrm_type");
+    return 0;
   case MODRM_ONEENTRY:
     return dec->instructionIDs[0];
   case MODRM_SPLITRM:
@@ -133,8 +137,6 @@ static InstrUID decode(OpcodeType type,
   case MODRM_FULL:
     return dec->instructionIDs[modRM];
   }
-  
-  return 0;
 }
 
 /*
@@ -342,7 +344,8 @@ static int readPrefixes(struct InternalInstruction* insn) {
         insn->segmentOverride = SEG_OVERRIDE_GS;
         break;
       default:
-        unreachable("Unhandled override");
+        debug("Unhandled override");
+        return -1;
       }
       if (prefixGroups[1])
         dbgprintf(insn, "Redundant Group 2 prefix");
@@ -376,7 +379,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
     if ((byte & 0xf0) == 0x40) {
       uint8_t opcodeByte;
       
-      if(lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+      if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
         dbgprintf(insn, "Redundant REX prefix");
         return -1;
       }
@@ -540,17 +543,17 @@ static int getIDWithAttrMask(uint16_t* instructionID,
 static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
   off_t i;
   
-  for(i = 0;; i++) {
-    if(orig[i] == '\0' && equiv[i] == '\0')
+  for (i = 0;; i++) {
+    if (orig[i] == '\0' && equiv[i] == '\0')
       return TRUE;
-    if(orig[i] == '\0' || equiv[i] == '\0')
+    if (orig[i] == '\0' || equiv[i] == '\0')
       return FALSE;
-    if(orig[i] != equiv[i]) {
-      if((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+    if (orig[i] != equiv[i]) {
+      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
         continue;
-      if((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
         continue;
-      if((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
         continue;
       return FALSE;
     }
@@ -567,17 +570,17 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
 static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
   off_t i;
   
-  for(i = 0;; i++) {
-    if(orig[i] == '\0' && equiv[i] == '\0')
+  for (i = 0;; i++) {
+    if (orig[i] == '\0' && equiv[i] == '\0')
       return TRUE;
-    if(orig[i] == '\0' || equiv[i] == '\0')
+    if (orig[i] == '\0' || equiv[i] == '\0')
       return FALSE;
-    if(orig[i] != equiv[i]) {
-      if((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+    if (orig[i] != equiv[i]) {
+      if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
         continue;
-      if((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+      if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
         continue;
-      if((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+      if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
         continue;
       return FALSE;
     }
@@ -615,7 +618,7 @@ static int getID(struct InternalInstruction* insn) {
   else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
     attrMask |= ATTR_XD;
   
-  if(getIDWithAttrMask(&instructionID, insn, attrMask))
+  if (getIDWithAttrMask(&instructionID, insn, attrMask))
     return -1;
   
   /* The following clauses compensate for limitations of the tables. */
@@ -792,7 +795,8 @@ static int readSIB(struct InternalInstruction* insn) {
                        SIB_BASE_EBP : SIB_BASE_RBP);
       break;
     case 0x3:
-      unreachable("Cannot have Mod = 0b11 and a SIB byte");
+      debug("Cannot have Mod = 0b11 and a SIB byte");
+      return -1;
     }
     break;
   default:
@@ -903,7 +907,7 @@ static int readModRM(struct InternalInstruction* insn) {
       if (rm == 0x6) {
         insn->eaBase = EA_BASE_NONE;
         insn->eaDisplacement = EA_DISP_16;
-        if(readDisplacement(insn))
+        if (readDisplacement(insn))
           return -1;
       } else {
         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
@@ -913,18 +917,18 @@ static int readModRM(struct InternalInstruction* insn) {
     case 0x1:
       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
       insn->eaDisplacement = EA_DISP_8;
-      if(readDisplacement(insn))
+      if (readDisplacement(insn))
         return -1;
       break;
     case 0x2:
       insn->eaBase = (EABase)(insn->eaBaseBase + rm);
       insn->eaDisplacement = EA_DISP_16;
-      if(readDisplacement(insn))
+      if (readDisplacement(insn))
         return -1;
       break;
     case 0x3:
       insn->eaBase = (EABase)(insn->eaRegBase + rm);
-      if(readDisplacement(insn))
+      if (readDisplacement(insn))
         return -1;
       break;
     }
@@ -942,13 +946,13 @@ static int readModRM(struct InternalInstruction* insn) {
         insn->eaBase = (insn->addressSize == 4 ? 
                         EA_BASE_sib : EA_BASE_sib64);
         readSIB(insn);
-        if(readDisplacement(insn))
+        if (readDisplacement(insn))
           return -1;
         break;
       case 0x5:
         insn->eaBase = EA_BASE_NONE;
         insn->eaDisplacement = EA_DISP_32;
-        if(readDisplacement(insn))
+        if (readDisplacement(insn))
           return -1;
         break;
       default:
@@ -964,12 +968,12 @@ static int readModRM(struct InternalInstruction* insn) {
       case 0xc:   /* in case REXW.b is set */
         insn->eaBase = EA_BASE_sib;
         readSIB(insn);
-        if(readDisplacement(insn))
+        if (readDisplacement(insn))
           return -1;
         break;
       default:
         insn->eaBase = (EABase)(insn->eaBaseBase + rm);
-        if(readDisplacement(insn))
+        if (readDisplacement(insn))
           return -1;
         break;
       }
@@ -993,11 +997,13 @@ static int readModRM(struct InternalInstruction* insn) {
     *valid = 1;                                           \
     switch (type) {                                       \
     default:                                              \
-      unreachable("Unhandled register type");             \
+      debug("Unhandled register type");                   \
+      *valid = 0;                                         \
+      return 0;                                           \
     case TYPE_Rv:                                         \
       return base + index;                                \
     case TYPE_R8:                                         \
-      if(insn->rexPrefix &&                               \
+      if (insn->rexPrefix &&                              \
          index >= 4 && index <= 7) {                      \
         return prefix##_SPL + (index - 4);                \
       } else {                                            \
@@ -1017,23 +1023,23 @@ static int readModRM(struct InternalInstruction* insn) {
     case TYPE_MM64:                                       \
     case TYPE_MM32:                                       \
     case TYPE_MM:                                         \
-      if(index > 7)                                       \
+      if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_MM0 + index;                        \
     case TYPE_SEGMENTREG:                                 \
-      if(index > 5)                                       \
+      if (index > 5)                                      \
         *valid = 0;                                       \
       return prefix##_ES + index;                         \
     case TYPE_DEBUGREG:                                   \
-      if(index > 7)                                       \
+      if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_DR0 + index;                        \
     case TYPE_CR32:                                       \
-      if(index > 7)                                       \
+      if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_ECR0 + index;                       \
     case TYPE_CR64:                                       \
-      if(index > 8)                                       \
+      if (index > 8)                                      \
         *valid = 0;                                       \
       return prefix##_RCR0 + index;                       \
     }                                                     \
@@ -1050,6 +1056,7 @@ static int readModRM(struct InternalInstruction* insn) {
  * @param index - The existing value of the field as reported by readModRM().
  * @param valid - The address of a uint8_t.  The target is set to 1 if the
  *                field is valid for the register class; 0 if not.
+ * @return      - The proper value.
  */
 GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
 GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
@@ -1071,7 +1078,8 @@ static int fixupReg(struct InternalInstruction *insn,
   
   switch ((OperandEncoding)op->encoding) {
   default:
-    unreachable("Expected a REG or R/M encoding in fixupReg");
+    debug("Expected a REG or R/M encoding in fixupReg");
+    return -1;
   case ENCODING_REG:
     insn->reg = (Reg)fixupRegValue(insn,
                                    (OperandType)op->type,
@@ -1102,26 +1110,29 @@ static int fixupReg(struct InternalInstruction *insn,
  * @param insn    - The instruction whose opcode field is to be read.
  * @param inModRM - Indicates that the opcode field is to be read from the
  *                  ModR/M extension; useful for escape opcodes
+ * @return        - 0 on success; nonzero otherwise.
  */
-static void readOpcodeModifier(struct InternalInstruction* insn) {
+static int readOpcodeModifier(struct InternalInstruction* insn) {
   dbgprintf(insn, "readOpcodeModifier()");
   
   if (insn->consumedOpcodeModifier)
-    return;
+    return 0;
   
   insn->consumedOpcodeModifier = TRUE;
   
-  switch(insn->spec->modifierType) {
+  switch (insn->spec->modifierType) {
   default:
-    unreachable("Unknown modifier type.");
+    debug("Unknown modifier type.");
+    return -1;
   case MODIFIER_NONE:
-    unreachable("No modifier but an operand expects one.");
+    debug("No modifier but an operand expects one.");
+    return -1;
   case MODIFIER_OPCODE:
     insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
-    break;
+    return 0;
   case MODIFIER_MODRM:
     insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
-    break;
+    return 0;
   }  
 }
 
@@ -1134,11 +1145,13 @@ static void readOpcodeModifier(struct InternalInstruction* insn) {
  * @param size  - The width (in bytes) of the register being specified.
  *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
  *                RAX.
+ * @return      - 0 on success; nonzero otherwise.
  */
-static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
   dbgprintf(insn, "readOpcodeRegister()");
 
-  readOpcodeModifier(insn);
+  if (readOpcodeModifier(insn))
+    return -1;
   
   if (size == 0)
     size = insn->registerSize;
@@ -1147,9 +1160,9 @@ static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
   case 1:
     insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 
                                                   | insn->opcodeModifier));
-    if(insn->rexPrefix && 
-       insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
-       insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+    if (insn->rexPrefix && 
+        insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+        insn->opcodeRegister < MODRM_REG_AL + 0x8) {
       insn->opcodeRegister = (Reg)(MODRM_REG_SPL
                                    + (insn->opcodeRegister - MODRM_REG_AL - 4));
     }
@@ -1161,7 +1174,7 @@ static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
                                     | insn->opcodeModifier));
     break;
   case 4:
-    insn->opcodeRegister = (Reg)(MODRM_REG_EAX +
+    insn->opcodeRegister = (Reg)(MODRM_REG_EAX
                                  + ((bFromREX(insn->rexPrefix) << 3) 
                                     | insn->opcodeModifier));
     break;
@@ -1171,6 +1184,8 @@ static void readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
                                     | insn->opcodeModifier));
     break;
   }
+  
+  return 0;
 }
 
 /*
@@ -1190,8 +1205,10 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
   
   dbgprintf(insn, "readImmediate()");
   
-  if (insn->numImmediatesConsumed == 2)
-    unreachable("Already consumed two immediates");
+  if (insn->numImmediatesConsumed == 2) {
+    debug("Already consumed two immediates");
+    return -1;
+  }
   
   if (size == 0)
     size = insn->immediateSize;
@@ -1274,29 +1291,35 @@ static int readOperands(struct InternalInstruction* insn) {
         return -1;
       break;
     case ENCODING_Iv:
-      readImmediate(insn, insn->immediateSize);
-      break;
+      if (readImmediate(insn, insn->immediateSize))
+        return -1;
     case ENCODING_Ia:
-      readImmediate(insn, insn->addressSize);
+      if (readImmediate(insn, insn->addressSize))
+        return -1;
       break;
     case ENCODING_RB:
-      readOpcodeRegister(insn, 1);
+      if (readOpcodeRegister(insn, 1))
+        return -1;
       break;
     case ENCODING_RW:
-      readOpcodeRegister(insn, 2);
+      if (readOpcodeRegister(insn, 2))
+        return -1;
       break;
     case ENCODING_RD:
-      readOpcodeRegister(insn, 4);
+      if (readOpcodeRegister(insn, 4))
+        return -1;
       break;
     case ENCODING_RO:
-      readOpcodeRegister(insn, 8);
+      if (readOpcodeRegister(insn, 8))
+        return -1;
       break;
     case ENCODING_Rv:
-      readOpcodeRegister(insn, 0);
+      if (readOpcodeRegister(insn, 0))
+        return -1;
       break;
     case ENCODING_I:
-      readOpcodeModifier(insn);
-      break;
+      if (readOpcodeModifier(insn))
+        return -1;
     case ENCODING_DUP:
       break;
     default:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index c03c07a3895c..462cf68ccdb2 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -508,6 +508,17 @@ int decodeInstruction(struct InternalInstruction* insn,
                       uint64_t startLoc,
                       DisassemblerMode mode);
 
+/* x86DisassemblerDebug - C-accessible function for printing a message to
+ *   debugs()
+ * @param file  - The name of the file printing the debug message.
+ * @param line  - The line number that printed the debug message.
+ * @param s     - The message to print.
+ */
+  
+void x86DisassemblerDebug(const char *file,
+                          unsigned line,
+                          const char *s);
+
 #ifdef __cplusplus 
 }
 #endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6b62795a6e14..89cc84ffb3c6 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -74,6 +74,8 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
                                           "HasVectorUAMem", "true",
                  "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
+                                      "Enable AES instructions">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -101,11 +103,17 @@ def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+// "Arrandale" along with corei3 and corei5
 def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
-                               FeatureFastUAMem]>;
+                               FeatureFastUAMem, FeatureAES]>;
 def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
                                FeatureFastUAMem]>;
+// Westmere is a similar machine to nehalem with some additional features.
+// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
+def : Proc<"westmere",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                                FeatureFastUAMem, FeatureAES]>;
 // Sandy Bridge does not have FMA
+// FIXME: Wikipedia says it does... it should have AES as well.
 def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 6a117dde9bde..541083f6fffd 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -129,7 +129,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
     }
     // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
     if (ContainsFPCode) {
-      BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc::getUnknownLoc(),
+      BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
               MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
       ++NumFPKill;
       Changed = true;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1c0ed7e6327c..da45dac807f6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -542,7 +542,7 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
                                              MachineFrameInfo *MFI) {
   const TargetInstrInfo *TII = TM.getInstrInfo();
   if (Subtarget->isTargetCygMing())
-    BuildMI(BB, DebugLoc::getUnknownLoc(),
+    BuildMI(BB, DebugLoc(),
             TII->get(X86::CALLpcrel32)).addExternalSymbol("__main");
 }
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b24d5a1707a0..527e19b60882 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1071,18 +1071,21 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
 /// If DstAlign is zero that means it's safe to destination alignment can
 /// satisfy any constraint. Similarly if SrcAlign is zero it means there
 /// isn't a need to check it against alignment requirement, probably because
-/// the source does not need to be loaded. It returns EVT::Other if
-/// SelectionDAG should be responsible for determining it.
+/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that
+/// means it's safe to return a non-scalar-integer type, e.g. constant string
+/// source or loaded from memory. It returns EVT::Other if SelectionDAG should
+/// be responsible for determining it.
 EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        unsigned DstAlign, unsigned SrcAlign,
-                                       bool SafeToUseFP,
+                                       bool NonScalarIntSafe,
                                        SelectionDAG &DAG) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = DAG.getMachineFunction().getFunction();
-  if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+  if (NonScalarIntSafe &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
         (Subtarget->isUnalignedMemAccessFast() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
@@ -1090,10 +1093,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
         Subtarget->getStackAlignment() >= 16) {
       if (Subtarget->hasSSE2())
         return MVT::v4i32;
-      if (SafeToUseFP && Subtarget->hasSSE1())
+      if (Subtarget->hasSSE1())
         return MVT::v4f32;
-    } else if (SafeToUseFP &&
-               Size >= 8 &&
+    } else if (Size >= 8 &&
                !Subtarget->is64Bit() &&
                Subtarget->getStackAlignment() >= 8 &&
                Subtarget->hasSSE2())
@@ -1147,8 +1149,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   if (!Subtarget->is64Bit())
     // This doesn't have DebugLoc associated with it, but is not really the
     // same as a Register.
-    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
-                       getPointerTy());
+    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
   return Table;
 }
 
@@ -1929,8 +1930,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     if (!isTailCall) {
       Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
                                DAG.getNode(X86ISD::GlobalBaseReg,
-                                           DebugLoc::getUnknownLoc(),
-                                           getPointerTy()),
+                                           DebugLoc(), getPointerTy()),
                                InFlag);
       InFlag = Chain.getValue(1);
     } else {
@@ -5059,7 +5059,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   if (OpFlag) {
     Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                                     DebugLoc(), getPointerTy()),
                          Result);
   }
 
@@ -5092,7 +5092,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
   if (OpFlag) {
     Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                                     DebugLoc(), getPointerTy()),
                          Result);
   }
 
@@ -5128,8 +5128,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
       !Subtarget->is64Bit()) {
     Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(),
-                                     getPointerTy()),
+                                     DebugLoc(), getPointerTy()),
                          Result);
   }
 
@@ -5251,8 +5250,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
   DebugLoc dl = GA->getDebugLoc();  // ? function entry point might be better
   SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
                                      DAG.getNode(X86ISD::GlobalBaseReg,
-                                                 DebugLoc::getUnknownLoc(),
-                                                 PtrVT), InFlag);
+                                                 DebugLoc(), PtrVT), InFlag);
   InFlag = Chain.getValue(1);
 
   return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
@@ -5274,7 +5272,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
   DebugLoc dl = GA->getDebugLoc();
   // Get the Thread Pointer
   SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
-                             DebugLoc::getUnknownLoc(), PtrVT,
+                             DebugLoc(), PtrVT,
                              DAG.getRegister(is64Bit? X86::FS : X86::GS,
                                              MVT::i32));
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4549cba6f397..2c2a5fbb8032 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -417,15 +417,19 @@ namespace llvm {
     virtual unsigned getByValTypeAlignment(const Type *Ty) const;
 
     /// getOptimalMemOpType - Returns the target specific optimal type for load
-    /// and store operations as a result of memset, memcpy, and memmove lowering.
-    /// If DstAlign is zero that means it's safe to destination alignment can
-    /// satisfy any constraint. Similarly if SrcAlign is zero it means there
-    /// isn't a need to check it against alignment requirement, probably because
-    /// the source does not need to be loaded. It returns EVT::Other if
-    /// SelectionDAG should be responsible for determining it.
-    virtual EVT getOptimalMemOpType(uint64_t Size,
-                                    unsigned DstAlign, unsigned SrcAlign,
-                                    bool SafeToUseFP, SelectionDAG &DAG) const;
+    /// and store operations as a result of memset, memcpy, and memmove
+    /// lowering. If DstAlign is zero that means it's safe to destination
+    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+    /// means there isn't a need to check it against alignment requirement,
+    /// probably because the source does not need to be loaded. If
+    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+    /// from memory. It returns EVT::Other if SelectionDAG should be responsible
+    /// for determining it.
+    virtual EVT
+    getOptimalMemOpType(uint64_t Size,
+                        unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, SelectionDAG &DAG) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index d25ec260491e..cbe4c8256299 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -311,6 +311,21 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
         Requires<[HasSSE42]>;
 
+// AES Instruction Templates:
+//
+// AES8I
+// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8
+// and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasAES]>;
+
 // X86-64 Instruction templates...
 //
 
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index c0c9d98ffea8..fcb9947423de 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1803,7 +1803,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            MachineBasicBlock *FBB,
                            const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc operand
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -2107,7 +2107,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
                                   SmallVectorImpl<MachineInstr*> &NewMIs) const {
   bool isAligned = (*MMOBegin)->getAlignment() >= 16;
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.addOperand(Addr[i]);
@@ -2202,7 +2202,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
   bool isAligned = (*MMOBegin)->getAlignment() >= 16;
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.addOperand(Addr[i]);
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 8fccc8a37aca..65b7ec023d81 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -330,6 +330,7 @@ def OptForSize   : Predicate<"OptForSize">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def HasAES       : Predicate<"Subtarget->hasAES()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index dadc2a663b57..11f7e27c4fc3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3848,53 +3848,6 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
 def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
           (PCMPGTQrm VR128:$src1, addr:$src2)>;
 
-// TODO: These should be AES as a feature set.
-defm AESIMC          : SS42I_binop_rm_int<0xDB, "aesimc",
-                       int_x86_aesni_aesimc>;
-defm AESENC          : SS42I_binop_rm_int<0xDC, "aesenc",
-                       int_x86_aesni_aesenc>;
-defm AESENCLAST      : SS42I_binop_rm_int<0xDD, "aesenclast",
-                       int_x86_aesni_aesenclast>;
-defm AESDEC          : SS42I_binop_rm_int<0xDE, "aesdec",
-                       int_x86_aesni_aesdec>;
-defm AESDECLAST      : SS42I_binop_rm_int<0xDF, "aesdeclast",
-                       int_x86_aesni_aesdeclast>;
-
-def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
-          (AESIMCrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
-          (AESIMCrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
-          (AESENCrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
-          (AESENCrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
-          (AESENCLASTrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
-          (AESENCLASTrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
-          (AESDECrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
-          (AESDECrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
-          (AESDECLASTrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
-          (AESDECLASTrm VR128:$src1, addr:$src2)>;
-
-def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst),
-  (ins VR128:$src1, i32i8imm:$src2),
-  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-  [(set VR128:$dst,
-    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
-  OpSize;
-def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst),
-  (ins i128mem:$src1, i32i8imm:$src2),
-  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-  [(set VR128:$dst,
-    (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
-                                    imm:$src2))]>,
-  OpSize;
-
 // crc intrinsic instruction
 // This set of instructions are only rm, the only difference is the size
 // of r and m.
@@ -4056,3 +4009,81 @@ defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
 defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
 defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
 defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+  multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic IntId128, bit Commutable = 0> {
+    def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2),
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+                   OpSize {
+      let isCommutable = Commutable;
+    }
+    def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+                   (ins VR128:$src1, i128mem:$src2),
+                   !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                   [(set VR128:$dst,
+                     (IntId128 VR128:$src1,
+                      (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+  }
+}
+
+defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
+                       int_x86_aesni_aesenc>;
+defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
+                       int_x86_aesni_aesenclast>;
+defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
+                       int_x86_aesni_aesdec>;
+defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
+                       int_x86_aesni_aesdeclast>;
+
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+          (AESENCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+          (AESENCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+          (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+          (AESENCLASTrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+          (AESDECrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+          (AESDECrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+          (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+          (AESDECLASTrm VR128:$src1, addr:$src2)>;
+
+def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+  (ins VR128:$src1),
+  "aesimc\t{$src1, $dst|$dst, $src1}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aesimc VR128:$src1))]>,
+  OpSize;
+
+def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+  (ins i128mem:$src1),
+  "aesimc\t{$src1, $dst|$dst, $src1}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+  OpSize;
+
+def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+  (ins VR128:$src1, i32i8imm:$src2),
+  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+  OpSize;
+def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+  (ins i128mem:$src1, i32i8imm:$src2),
+  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+                                    imm:$src2))]>,
+  OpSize;
+
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 8a0cde49aea6..09a26858eb7e 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -259,6 +259,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
 
   HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
   HasAVX = ((ECX >> 28) & 0x1);
+  HasAES = IsIntel && ((ECX >> 25) & 0x1);
 
   if (IsIntel || IsAMD) {
     // Determine if bit test memory instructions are slow.
@@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   , HasX86_64(false)
   , HasSSE4A(false)
   , HasAVX(false)
+  , HasAES(false)
   , HasFMA3(false)
   , HasFMA4(false)
   , IsBTMemSlow(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index bf30154625bf..8a873f04df4e 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -69,6 +69,9 @@ protected:
   /// HasAVX - Target has AVX instructions
   bool HasAVX;
 
+  /// HasAES - Target has AES instructions
+  bool HasAES;
+
   /// HasFMA3 - Target has 3-operand fused multiply-add
   bool HasFMA3;
 
@@ -148,6 +151,7 @@ public:
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
   bool hasAVX() const { return HasAVX; }
+  bool hasAES() const { return HasAES; }
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 54df33c50647..ae3f16c2dfe4 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -301,7 +301,7 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              MachineBasicBlock *FBB,
                              const SmallVectorImpl<MachineOperand> &Cond)const{
   // FIXME there should probably be a DebugLoc argument here
-  DebugLoc dl = DebugLoc::getUnknownLoc();
+  DebugLoc dl;
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -362,7 +362,7 @@ bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                   unsigned DestReg, unsigned SrcReg,
                                   const TargetRegisterClass *DestRC,
                                   const TargetRegisterClass *SrcRC) const {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (DestRC == SrcRC) {
@@ -397,7 +397,7 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                          int FrameIndex,
                                          const TargetRegisterClass *RC) const
 {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   BuildMI(MBB, I, DL, get(XCore::STWFI))
     .addReg(SrcReg, getKillRegState(isKill))
@@ -410,7 +410,7 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                           unsigned DestReg, int FrameIndex,
                                           const TargetRegisterClass *RC) const
 {
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
     .addFrameIndex(FrameIndex)
@@ -431,7 +431,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   
   bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
 
-  DebugLoc DL = DebugLoc::getUnknownLoc();
+  DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   
   for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 88925047ccd7..1631e7399922 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -414,8 +414,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
-  DebugLoc dl = (MBBI != MBB.end() ?
-                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   bool FP = hasFP(MF);
 
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 3d319320862d..47abb7dfd812 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -156,6 +156,8 @@ bool InternalizePass::runOnModule(Module &M) {
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I)
     if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
         !ExternalNames.count(I->getName())) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
@@ -167,6 +169,8 @@ bool InternalizePass::runOnModule(Module &M) {
   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
        I != E; ++I)
     if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
         !ExternalNames.count(I->getName())) {
       I->setLinkage(GlobalValue::InternalLinkage);
       Changed = true;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 988a4cb3f2a2..6605666e45d1 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -510,6 +510,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
     // Now expand it into actual Instructions and patch it into place.
     Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
 
+    // Inform ScalarEvolution that this value is changing. The change doesn't
+    // affect its value, but it does potentially affect which use lists the
+    // value will be on after the replacement, which affects ScalarEvolution's
+    // ability to walk use lists and drop dangling pointers when a value is
+    // deleted.
+    SE->forgetValue(User);
+
     // Patch the new value into place.
     if (Op->hasName())
       NewVal->takeName(Op);
@@ -616,36 +623,18 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
   }
 }
 
-/// Return true if it is OK to use SIToFPInst for an induction variable
-/// with given initial and exit values.
-static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
-                          uint64_t intIV, uint64_t intEV) {
-
-  if (InitV.getValueAPF().isNegative() || ExitV.getValueAPF().isNegative())
-    return true;
-
-  // If the iteration range can be handled by SIToFPInst then use it.
-  APInt Max = APInt::getSignedMaxValue(32);
-  if (Max.getZExtValue() > static_cast<uint64_t>(abs64(intEV - intIV)))
-    return true;
-
-  return false;
-}
-
-/// convertToInt - Convert APF to an integer, if possible.
-static bool convertToInt(const APFloat &APF, uint64_t *intVal) {
-
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
   bool isExact = false;
   if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
     return false;
-  if (APF.convertToInteger(intVal, 32, APF.isNegative(),
-                           APFloat::rmTowardZero, &isExact)
-      != APFloat::opOK)
-    return false;
-  if (!isExact)
+  // See if we can convert this to an int64_t
+  uint64_t UIntVal;
+  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+                           &isExact) != APFloat::opOK || !isExact)
     return false;
+  IntVal = UIntVal;
   return true;
-
 }
 
 /// HandleFloatingPointIV - If the loop has floating induction variable
@@ -657,144 +646,200 @@ static bool convertToInt(const APFloat &APF, uint64_t *intVal) {
 /// for(int i = 0; i < 10000; ++i)
 ///   bar((double)i);
 ///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
-
-  unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
   unsigned BackEdge     = IncomingEdge^1;
 
   // Check incoming value.
-  ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge));
-  if (!InitValue) return;
-  uint64_t newInitValue =
-              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
-  if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
+  ConstantFP *InitValueVal =
+    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+
+  int64_t InitValue;
+  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
     return;
 
-  // Check IV increment. Reject this PH if increment operation is not
+  // Check IV increment. Reject this PN if increment operation is not
   // an add or increment value can not be represented by an integer.
   BinaryOperator *Incr =
-    dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
-  if (!Incr) return;
-  if (Incr->getOpcode() != Instruction::FAdd) return;
-  ConstantFP *IncrValue = NULL;
-  unsigned IncrVIndex = 1;
-  if (Incr->getOperand(1) == PH)
-    IncrVIndex = 0;
-  IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex));
-  if (!IncrValue) return;
-  uint64_t newIncrValue =
-              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
-  if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue))
+    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+  
+  // If this is not an add of the PHI with a constantfp, or if the constant fp
+  // is not an integer, bail out.
+  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  int64_t IncValue;
+  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
     return;
 
-  // Check Incr uses. One user is PH and the other users is exit condition used
-  // by the conditional terminator.
+  // Check Incr uses. One user is PN and the other user is an exit condition
+  // used by the conditional terminator.
   Value::use_iterator IncrUse = Incr->use_begin();
   Instruction *U1 = cast<Instruction>(IncrUse++);
   if (IncrUse == Incr->use_end()) return;
   Instruction *U2 = cast<Instruction>(IncrUse++);
   if (IncrUse != Incr->use_end()) return;
 
-  // Find exit condition.
-  FCmpInst *EC = dyn_cast<FCmpInst>(U1);
-  if (!EC)
-    EC = dyn_cast<FCmpInst>(U2);
-  if (!EC) return;
-
-  if (BranchInst *BI = dyn_cast<BranchInst>(EC->getParent()->getTerminator())) {
-    if (!BI->isConditional()) return;
-    if (BI->getCondition() != EC) return;
-  }
-
-  // Find exit value. If exit value can not be represented as an integer then
-  // do not handle this floating point PH.
-  ConstantFP *EV = NULL;
-  unsigned EVIndex = 1;
-  if (EC->getOperand(1) == Incr)
-    EVIndex = 0;
-  EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex));
-  if (!EV) return;
-  uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
-  if (!convertToInt(EV->getValueAPF(), &intEV))
+  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
+  // only used by a branch, we can't transform it.
+  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+  if (!Compare)
+    Compare = dyn_cast<FCmpInst>(U2);
+  if (Compare == 0 || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->use_back()))
     return;
-
+  
+  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+  // We need to verify that the branch actually controls the iteration count
+  // of the loop.  If not, the new IV can overflow and no one will notice.
+  // The branch block must be in the loop and one of the successors must be out
+  // of the loop.
+  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(TheBr->getParent()) ||
+      (L->contains(TheBr->getSuccessor(0)) &&
+       L->contains(TheBr->getSuccessor(1))))
+    return;
+  
+  
+  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+  // transform it.
+  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  int64_t ExitValue;
+  if (ExitValueVal == 0 ||
+      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+    return;
+  
   // Find new predicate for integer comparison.
   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
-  switch (EC->getPredicate()) {
+  switch (Compare->getPredicate()) {
+  default: return;  // Unknown comparison.
   case CmpInst::FCMP_OEQ:
-  case CmpInst::FCMP_UEQ:
-    NewPred = CmpInst::ICMP_EQ;
-    break;
+  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
   case CmpInst::FCMP_OGT:
-  case CmpInst::FCMP_UGT:
-    NewPred = CmpInst::ICMP_UGT;
-    break;
+  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
   case CmpInst::FCMP_OGE:
-  case CmpInst::FCMP_UGE:
-    NewPred = CmpInst::ICMP_UGE;
-    break;
+  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
   case CmpInst::FCMP_OLT:
-  case CmpInst::FCMP_ULT:
-    NewPred = CmpInst::ICMP_ULT;
-    break;
+  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
   case CmpInst::FCMP_OLE:
-  case CmpInst::FCMP_ULE:
-    NewPred = CmpInst::ICMP_ULE;
-    break;
-  default:
-    break;
+  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
   }
-  if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;
+  
+  // We convert the floating point induction variable to a signed i32 value if
+  // we can.  This is only safe if the comparison will not overflow in a way
+  // that won't be trapped by the integer equivalent operations.  Check for this
+  // now.
+  // TODO: We could use i64 if it is native and the range requires it.
+  
+  // The start/stride/exit values must all fit in signed i32.
+  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+    return;
+
+  // If not actually striding (add x, 0.0), avoid touching the code.
+  if (IncValue == 0)
+    return;
+
+  // Positive and negative strides have different safety conditions.
+  if (IncValue > 0) {
+    // If we have a positive stride, we require the init to be less than the
+    // exit value and an equality or less than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+      return;
+    
+    uint32_t Range = uint32_t(ExitValue-InitValue);
+    if (NewPred == CmpInst::ICMP_SLE) {
+      // Normalize SLE -> SLT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+    
+    unsigned Leftover = Range % uint32_t(IncValue);
+    
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+    
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+      return;
+    
+  } else {
+    // If we have a negative stride, we require the init to be greater than the
+    // exit value and an equality or greater than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+      return;
+    
+    uint32_t Range = uint32_t(InitValue-ExitValue);
+    if (NewPred == CmpInst::ICMP_SGE) {
+      // Normalize SGE -> SGT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+    
+    unsigned Leftover = Range % uint32_t(-IncValue);
+    
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+    
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+      return;
+  }
+  
+  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 
   // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()),
-                                    PH->getName()+".int", PH);
-  NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()),
-                                       newInitValue),
-                      PH->getIncomingBlock(IncomingEdge));
-
-  Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
-                           ConstantInt::get(Type::getInt32Ty(PH->getContext()),
-                                                             newIncrValue),
-                                            Incr->getName()+".int", Incr);
-  NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));
-
-  // The back edge is edge 1 of newPHI, whatever it may have been in the
-  // original PHI.
-  ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()),
-                                        intEV);
-  Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
-  Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
-  ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
-                                 NewPred, LHS, RHS, EC->getName());
-
-  // In the following deletions, PH may become dead and may be deleted.
+  PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+                      PN->getIncomingBlock(IncomingEdge));
+
+  Value *NewAdd =
+    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+                              Incr->getName()+".int", Incr);
+  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
+
+  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+                                      ConstantInt::get(Int32Ty, ExitValue),
+                                      Compare->getName());
+
+  // In the following deletions, PN may become dead and may be deleted.
   // Use a WeakVH to observe whether this happens.
-  WeakVH WeakPH = PH;
+  WeakVH WeakPH = PN;
 
-  // Delete old, floating point, exit comparison instruction.
-  NewEC->takeName(EC);
-  EC->replaceAllUsesWith(NewEC);
-  RecursivelyDeleteTriviallyDeadInstructions(EC);
+  // Delete the old floating point exit comparison.  The branch starts using the
+  // new comparison.
+  NewCompare->takeName(Compare);
+  Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare);
 
-  // Delete old, floating point, increment instruction.
+  // Delete the old floating point increment.
   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
   RecursivelyDeleteTriviallyDeadInstructions(Incr);
 
-  // Replace floating induction variable, if it isn't already deleted.
-  // Give SIToFPInst preference over UIToFPInst because it is faster on
-  // platforms that are widely used.
-  if (WeakPH && !PH->use_empty()) {
-    if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
-      SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
-                                        PH->getParent()->getFirstNonPHI());
-      PH->replaceAllUsesWith(Conv);
-    } else {
-      UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
-                                        PH->getParent()->getFirstNonPHI());
-      PH->replaceAllUsesWith(Conv);
-    }
-    RecursivelyDeleteTriviallyDeadInstructions(PH);
+  // If the FP induction variable still has uses, this is because something else
+  // in the loop uses its value.  In order to canonicalize the induction
+  // variable, we chose to eliminate the IV and rewrite it in terms of an
+  // int->fp cast.
+  //
+  // We give preference to sitofp over uitofp because it is faster on most
+  // platforms.
+  if (WeakPH) {
+    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+                                 PN->getParent()->getFirstNonPHI());
+    PN->replaceAllUsesWith(Conv);
+    RecursivelyDeleteTriviallyDeadInstructions(PN);
   }
 
   // Add a new IVUsers entry for the newly-created integer PHI.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e3b809e35d4c..27fd2ef5a686 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -415,46 +415,44 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
 
   Function *F = loopHeader->getParent();
 
-  // If the condition is trivial, always unswitch.  There is no code growth for
-  // this case.
-  if (!IsTrivialUnswitchCondition(LoopCond)) {
-    // Check to see if it would be profitable to unswitch current loop.
+  Constant *CondVal = 0;
+  BasicBlock *ExitBlock = 0;
+  if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+    // If the condition is trivial, always unswitch. There is no code growth
+    // for this case.
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+    return true;
+  }
 
-    // Do not do non-trivial unswitch while optimizing for size.
-    if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
-      return false;
+  // Check to see if it would be profitable to unswitch current loop.
 
-    // FIXME: This is overly conservative because it does not take into
-    // consideration code simplification opportunities and code that can
-    // be shared by the resultant unswitched loops.
-    CodeMetrics Metrics;
-    for (Loop::block_iterator I = currentLoop->block_begin(), 
-           E = currentLoop->block_end();
-         I != E; ++I)
-      Metrics.analyzeBasicBlock(*I);
-
-    // Limit the number of instructions to avoid causing significant code
-    // expansion, and the number of basic blocks, to avoid loops with
-    // large numbers of branches which cause loop unswitching to go crazy.
-    // This is a very ad-hoc heuristic.
-    if (Metrics.NumInsts > Threshold ||
-        Metrics.NumBlocks * 5 > Threshold ||
-        Metrics.NeverInline) {
-      DEBUG(dbgs() << "NOT unswitching loop %"
-            << currentLoop->getHeader()->getName() << ", cost too high: "
-            << currentLoop->getBlocks().size() << "\n");
-      return false;
-    }
-  }
+  // Do not do non-trivial unswitch while optimizing for size.
+  if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+    return false;
 
-  Constant *CondVal;
-  BasicBlock *ExitBlock;
-  if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
-    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
-  } else {
-    UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+  // FIXME: This is overly conservative because it does not take into
+  // consideration code simplification opportunities and code that can
+  // be shared by the resultant unswitched loops.
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = currentLoop->block_begin(), 
+         E = currentLoop->block_end();
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+
+  // Limit the number of instructions to avoid causing significant code
+  // expansion, and the number of basic blocks, to avoid loops with
+  // large numbers of branches which cause loop unswitching to go crazy.
+  // This is a very ad-hoc heuristic.
+  if (Metrics.NumInsts > Threshold ||
+      Metrics.NumBlocks * 5 > Threshold ||
+      Metrics.NeverInline) {
+    DEBUG(dbgs() << "NOT unswitching loop %"
+          << currentLoop->getHeader()->getName() << ", cost too high: "
+          << currentLoop->getBlocks().size() << "\n");
+    return false;
   }
 
+  UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
   return true;
 }
 
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 292332e4f8a6..a31235a1f5cb 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -14,82 +14,31 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Instructions.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/AlignOf.h"
-#include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-/// BBInfo - Per-basic block information used internally by SSAUpdater.
-/// The predecessors of each block are cached here since pred_iterator is
-/// slow and we need to iterate over the blocks at least a few times.
-class SSAUpdater::BBInfo {
-public:
-  Value *AvailableVal; // Value to use in this block.
-  BasicBlock *DefBB;   // Block that defines the available value.
-  unsigned NumPreds;   // Number of predecessor blocks.
-  BasicBlock **Preds;  // Array[NumPreds] of predecessor blocks.
-  unsigned Counter;    // Marker to identify blocks already visited.
-  PHINode *PHITag;     // Marker for existing PHIs that match.
-
-  BBInfo(BasicBlock *BB, Value *V, BumpPtrAllocator *Allocator);
-};
-typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
-
-SSAUpdater::BBInfo::BBInfo(BasicBlock *BB, Value *V,
-                           BumpPtrAllocator *Allocator)
-  : AvailableVal(V), DefBB(0), NumPreds(0), Preds(0), Counter(0), PHITag(0) {
-  // If this block has a known value, don't bother finding its predecessors.
-  if (V) {
-    DefBB = BB;
-    return;
-  }
-
-  // We can get our predecessor info by walking the pred_iterator list, but it
-  // is relatively slow.  If we already have PHI nodes in this block, walk one
-  // of them to get the predecessor list instead.
-  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
-    NumPreds = SomePhi->getNumIncomingValues();
-    Preds = static_cast<BasicBlock**>
-      (Allocator->Allocate(NumPreds * sizeof(BasicBlock*),
-                           AlignOf<BasicBlock*>::Alignment));
-    for (unsigned pi = 0; pi != NumPreds; ++pi)
-      Preds[pi] = SomePhi->getIncomingBlock(pi);
-    return;
-  }
-
-  // Stash the predecessors in a temporary vector until we know how much space
-  // to allocate for them.
-  SmallVector<BasicBlock*, 10> TmpPreds;
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-    TmpPreds.push_back(*PI);
-    ++NumPreds;
-  }
-  Preds = static_cast<BasicBlock**>
-    (Allocator->Allocate(NumPreds * sizeof(BasicBlock*),
-                         AlignOf<BasicBlock*>::Alignment));
-  memcpy(Preds, TmpPreds.data(), NumPreds * sizeof(BasicBlock*));
-}
+typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
+typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
+                IncomingPredInfoTy;
 
-typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
 static AvailableValsTy &getAvailableVals(void *AV) {
   return *static_cast<AvailableValsTy*>(AV);
 }
 
-static BBMapTy *getBBMap(void *BM) {
-  return static_cast<BBMapTy*>(BM);
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+  return *static_cast<IncomingPredInfoTy*>(IPI);
 }
 
-static BumpPtrAllocator *getAllocator(void *BPA) {
-  return static_cast<BumpPtrAllocator*>(BPA);
-}
 
 SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
-  : AV(0), PrototypeValue(0), BM(0), BPA(0), InsertedPHIs(NewPHI) {}
+  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
   delete &getAvailableVals(AV);
+  delete &getIncomingPredInfo(IPI);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -99,6 +48,11 @@ void SSAUpdater::Initialize(Value *ProtoValue) {
     AV = new AvailableValsTy();
   else
     getAvailableVals(AV).clear();
+
+  if (IPI == 0)
+    IPI = new IncomingPredInfoTy();
+  else
+    getIncomingPredInfo(IPI).clear();
   PrototypeValue = ProtoValue;
 }
 
@@ -119,7 +73,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
 
 /// IsEquivalentPHI - Check if PHI has the same incoming value as specified
 /// in ValueMapping for each predecessor block.
-static bool IsEquivalentPHI(PHINode *PHI,
+static bool IsEquivalentPHI(PHINode *PHI, 
                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
   unsigned PHINumValues = PHI->getNumIncomingValues();
   if (PHINumValues != ValueMapping.size())
@@ -135,12 +89,38 @@ static bool IsEquivalentPHI(PHINode *PHI,
   return true;
 }
 
+/// GetExistingPHI - Check if BB already contains a phi node that is equivalent
+/// to the specified mapping from predecessor blocks to incoming values.
+static Value *GetExistingPHI(BasicBlock *BB,
+                             DenseMap<BasicBlock*, Value*> &ValueMapping) {
+  PHINode *SomePHI;
+  for (BasicBlock::iterator It = BB->begin();
+       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+    if (IsEquivalentPHI(SomePHI, ValueMapping))
+      return SomePHI;
+  }
+  return 0;
+}
+
+/// GetExistingPHI - Check if BB already contains an equivalent phi node.
+/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*>
+/// objects that specify the mapping from predecessor blocks to incoming values.
+template<typename InputIt>
+static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I,
+                             const InputIt &E) {
+  // Avoid create the mapping if BB has no phi nodes at all.
+  if (!isa<PHINode>(BB->begin()))
+    return 0;
+  DenseMap<BasicBlock*, Value*> ValueMapping(I, E);
+  return GetExistingPHI(BB, ValueMapping);
+}
+
 /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
 /// live at the end of the specified block.
 Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
-  assert(BM == 0 && BPA == 0 && "Unexpected Internal State");
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
   Value *Res = GetValueAtEndOfBlockInternal(BB);
-  assert(BM == 0 && BPA == 0 && "Unexpected Internal State");
+  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
   return Res;
 }
 
@@ -166,7 +146,7 @@ Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
 Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   // If there is no definition of the renamed variable in this block, just use
   // GetValueAtEndOfBlock to do our work.
-  if (!HasValueForBlock(BB))
+  if (!getAvailableVals(AV).count(BB))
     return GetValueAtEndOfBlock(BB);
 
   // Otherwise, we have the hard case.  Get the live-in values for each
@@ -213,18 +193,10 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
   if (SingularValue != 0)
     return SingularValue;
 
-  // Otherwise, we do need a PHI: check to see if we already have one available
-  // in this block that produces the right value.
-  if (isa<PHINode>(BB->begin())) {
-    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
-                                               PredValues.end());
-    PHINode *SomePHI;
-    for (BasicBlock::iterator It = BB->begin();
-         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-      if (IsEquivalentPHI(SomePHI, ValueMapping))
-        return SomePHI;
-    }
-  }
+  // Otherwise, we do need a PHI.
+  if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(),
+                                          PredValues.end()))
+    return ExistingPHI;
 
   // Ok, we have no way out, insert a new one now.
   PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
@@ -254,7 +226,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
 /// which use their value in the corresponding predecessor.
 void SSAUpdater::RewriteUse(Use &U) {
   Instruction *User = cast<Instruction>(U.getUser());
-
+  
   Value *V;
   if (PHINode *UserPN = dyn_cast<PHINode>(User))
     V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
@@ -264,264 +236,161 @@ void SSAUpdater::RewriteUse(Use &U) {
   U.set(V);
 }
 
+
 /// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
 /// for the specified BB and if so, return it.  If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
 Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
   AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  if (Value *V = AvailableVals[BB])
-    return V;
-
-  // Pool allocation used internally by GetValueAtEndOfBlock.
-  BumpPtrAllocator AllocatorObj;
-  BBMapTy BBMapObj;
-  BPA = &AllocatorObj;
-  BM = &BBMapObj;
-
-  BBInfo *Info = new (AllocatorObj) BBInfo(BB, 0, &AllocatorObj);
-  BBMapObj[BB] = Info;
-
-  bool Changed;
-  unsigned Counter = 1;
-  do {
-    Changed = false;
-    FindPHIPlacement(BB, Info, Changed, Counter);
-    ++Counter;
-  } while (Changed);
-
-  FindAvailableVal(BB, Info, Counter);
-
-  BPA = 0;
-  BM = 0;
-  return Info->AvailableVal;
-}
 
-/// FindPHIPlacement - Recursively visit the predecessors of a block to find
-/// the reaching definition for each predecessor and then determine whether
-/// a PHI is needed in this block.
-void SSAUpdater::FindPHIPlacement(BasicBlock *BB, BBInfo *Info, bool &Changed,
-                                  unsigned Counter) {
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  BBMapTy *BBMap = getBBMap(BM);
-  BumpPtrAllocator *Allocator = getAllocator(BPA);
-  bool BBNeedsPHI = false;
-  BasicBlock *SamePredDefBB = 0;
-
-  // If there are no predecessors, then we must have found an unreachable
-  // block.  Treat it as a definition with 'undef'.
-  if (Info->NumPreds == 0) {
-    Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
-    Info->DefBB = BB;
-    return;
+  // Query AvailableVals by doing an insertion of null.
+  std::pair<AvailableValsTy::iterator, bool> InsertRes =
+    AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>()));
+
+  // Handle the case when the insertion fails because we have already seen BB.
+  if (!InsertRes.second) {
+    // If the insertion failed, there are two cases.  The first case is that the
+    // value is already available for the specified block.  If we get this, just
+    // return the value.
+    if (InsertRes.first->second != 0)
+      return InsertRes.first->second;
+
+    // Otherwise, if the value we find is null, then this is the value is not
+    // known but it is being computed elsewhere in our recursion.  This means
+    // that we have a cycle.  Handle this by inserting a PHI node and returning
+    // it.  When we get back to the first instance of the recursion we will fill
+    // in the PHI node.
+    return InsertRes.first->second =
+      PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
+                      &BB->front());
   }
 
-  Info->Counter = Counter;
-  for (unsigned pi = 0; pi != Info->NumPreds; ++pi) {
-    BasicBlock *Pred = Info->Preds[pi];
-    BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
-    if (!BBMapBucket.second) {
-      Value *PredVal = AvailableVals.lookup(Pred);
-      BBMapBucket.second = new (*Allocator) BBInfo(Pred, PredVal, Allocator);
-    }
-    BBInfo *PredInfo = BBMapBucket.second;
-    BasicBlock *DefBB = 0;
-    if (!PredInfo->AvailableVal) {
-      if (PredInfo->Counter != Counter)
-        FindPHIPlacement(Pred, PredInfo, Changed, Counter);
-
-      // Ignore back edges where the value is not yet known.
-      if (!PredInfo->DefBB)
-        continue;
+  // Okay, the value isn't in the map and we just inserted a null in the entry
+  // to indicate that we're processing the block.  Since we have no idea what
+  // value is in this block, we have to recurse through our predecessors.
+  //
+  // While we're walking our predecessors, we keep track of them in a vector,
+  // then insert a PHI node in the end if we actually need one.  We could use a
+  // smallvector here, but that would take a lot of stack space for every level
+  // of the recursion, just use IncomingPredInfo as an explicit stack.
+  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+
+  // As we're walking the predecessors, keep track of whether they are all
+  // producing the same value.  If so, this value will capture it, if not, it
+  // will get reset to null.  We distinguish the no-predecessor case explicitly
+  // below.
+  TrackingVH<Value> ExistingValue;
+
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+
+      // Set ExistingValue to singular value from all predecessors so far.
+      if (i == 0)
+        ExistingValue = PredVal;
+      else if (PredVal != ExistingValue)
+        ExistingValue = 0;
     }
-    DefBB = PredInfo->DefBB;
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
+      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
 
-    if (!SamePredDefBB)
-      SamePredDefBB = DefBB;
-    else if (DefBB != SamePredDefBB)
-      BBNeedsPHI = true;
+      // Set ExistingValue to singular value from all predecessors so far.
+      if (isFirstPred) {
+        ExistingValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != ExistingValue)
+        ExistingValue = 0;
+    }
   }
 
-  BasicBlock *NewDefBB = (BBNeedsPHI ? BB : SamePredDefBB);
-  if (Info->DefBB != NewDefBB) {
-    Changed = true;
-    Info->DefBB = NewDefBB;
-  }
-}
+  // If there are no predecessors, then we must have found an unreachable block
+  // just return 'undef'.  Since there are no predecessors, InsertRes must not
+  // be invalidated.
+  if (IncomingPredInfo.size() == FirstPredInfoEntry)
+    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
+
+  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
+  /// this block is involved in a loop, a no-entry PHI node will have been
+  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
+  /// above.
+  TrackingVH<Value> &InsertedVal = AvailableVals[BB];
+
+  // If the predecessor values are not all the same, then check to see if there
+  // is an existing PHI that can be used.
+  if (!ExistingValue)
+    ExistingValue = GetExistingPHI(BB,
+                                   IncomingPredInfo.begin()+FirstPredInfoEntry,
+                                   IncomingPredInfo.end());
+
+  // If there is an existing value we can use, then we don't need to insert a
+  // PHI.  This is the simple and common case.
+  if (ExistingValue) {
+    // If a PHI node got inserted, replace it with the existing value and delete
+    // it.
+    if (InsertedVal) {
+      PHINode *OldVal = cast<PHINode>(InsertedVal);
+      // Be careful about dead loops.  These RAUW's also update InsertedVal.
+      if (InsertedVal != ExistingValue)
+        OldVal->replaceAllUsesWith(ExistingValue);
+      else
+        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
+      OldVal->eraseFromParent();
+    } else {
+      InsertedVal = ExistingValue;
+    }
 
-/// FindAvailableVal - If this block requires a PHI, first check if an existing
-/// PHI matches the PHI placement and reaching definitions computed earlier,
-/// and if not, create a new PHI.  Visit all the block's predecessors to
-/// calculate the available value for each one and fill in the incoming values
-/// for a new PHI.
-void SSAUpdater::FindAvailableVal(BasicBlock *BB, BBInfo *Info,
-                                  unsigned Counter) {
-  if (Info->AvailableVal || Info->Counter == Counter)
-    return;
+    // Either path through the 'if' should have set InsertedVal -> ExistingVal.
+    assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) &&
+           "RAUW didn't change InsertedVal to be ExistingValue");
 
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  BBMapTy *BBMap = getBBMap(BM);
-
-  // Check if there needs to be a PHI in BB.
-  PHINode *NewPHI = 0;
-  if (Info->DefBB == BB) {
-    // Look for an existing PHI.
-    FindExistingPHI(BB);
-    if (!Info->AvailableVal) {
-      NewPHI = PHINode::Create(PrototypeValue->getType(),
-                               PrototypeValue->getName(), &BB->front());
-      NewPHI->reserveOperandSpace(Info->NumPreds);
-      Info->AvailableVal = NewPHI;
-      AvailableVals[BB] = NewPHI;
-    }
+    // Drop the entries we added in IncomingPredInfo to restore the stack.
+    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                           IncomingPredInfo.end());
+    return ExistingValue;
   }
 
-  // Iterate through the block's predecessors.
-  Info->Counter = Counter;
-  for (unsigned pi = 0; pi != Info->NumPreds; ++pi) {
-    BasicBlock *Pred = Info->Preds[pi];
-    BBInfo *PredInfo = (*BBMap)[Pred];
-    FindAvailableVal(Pred, PredInfo, Counter);
-    if (NewPHI) {
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != Pred)
-        PredInfo = (*BBMap)[PredInfo->DefBB];
-      NewPHI->addIncoming(PredInfo->AvailableVal, Pred);
-    } else if (!Info->AvailableVal)
-      Info->AvailableVal = PredInfo->AvailableVal;
-  }
+  // Otherwise, we do need a PHI: insert one now if we don't already have one.
+  if (InsertedVal == 0)
+    InsertedVal = PHINode::Create(PrototypeValue->getType(),
+                                  PrototypeValue->getName(), &BB->front());
 
-  if (NewPHI) {
-    DEBUG(dbgs() << "  Inserted PHI: " << *NewPHI << "\n");
+  PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
+  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
 
-    // If the client wants to know about all new instructions, tell it.
-    if (InsertedPHIs) InsertedPHIs->push_back(NewPHI);
-  }
-}
+  // Fill in all the predecessors of the PHI.
+  for (IncomingPredInfoTy::iterator I =
+         IncomingPredInfo.begin()+FirstPredInfoEntry,
+       E = IncomingPredInfo.end(); I != E; ++I)
+    InsertedPHI->addIncoming(I->second, I->first);
 
-/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
-/// them match what is needed.
-void SSAUpdater::FindExistingPHI(BasicBlock *BB) {
-  PHINode *SomePHI;
-  for (BasicBlock::iterator It = BB->begin();
-       (SomePHI = dyn_cast<PHINode>(It)); ++It) {
-    if (CheckIfPHIMatches(SomePHI)) {
-      RecordMatchingPHI(SomePHI);
-      break;
-    }
-    ClearPHITags(SomePHI);
-  }
-}
+  // Drop the entries we added in IncomingPredInfo to restore the stack.
+  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                         IncomingPredInfo.end());
 
-/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
-/// in the BBMap.
-bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Mark that the block containing this PHI has been visited.
-  (*BBMap)[PHI->getParent()]->PHITag = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      Value *IncomingVal = PHI->getIncomingValue(i);
-      BasicBlock *Pred = PHI->getIncomingBlock(i);
-      BBInfo *PredInfo = (*BBMap)[Pred];
-      // Skip to the nearest preceding definition.
-      if (PredInfo->DefBB != Pred) {
-        Pred = PredInfo->DefBB;
-        PredInfo = (*BBMap)[Pred];
-      }
-
-      // Check if it matches the expected value.
-      if (PredInfo->AvailableVal) {
-        if (IncomingVal == PredInfo->AvailableVal)
-          continue;
-        return false;
-      }
-
-      // Check if the value is a PHI in the correct block.
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
-      if (!IncomingPHIVal || IncomingPHIVal->getParent() != Pred)
-        return false;
-
-      // If this block has already been visited, check if this PHI matches.
-      if (PredInfo->PHITag) {
-        if (IncomingPHIVal == PredInfo->PHITag)
-          continue;
-        return false;
-      }
-      PredInfo->PHITag = IncomingPHIVal;
-
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
-  return true;
-}
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
+    InsertedPHI->replaceAllUsesWith(ConstVal);
+    InsertedPHI->eraseFromParent();
+    InsertedVal = ConstVal;
+  } else {
+    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
-/// RecordMatchingPHI - For a PHI node that matches, record it and its input
-/// PHIs in both the BBMap and the AvailableVals mapping.
-void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
-  AvailableValsTy &AvailableVals = getAvailableVals(AV);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Record this PHI.
-  BasicBlock *BB = PHI->getParent();
-  AvailableVals[BB] = PHI;
-  (*BBMap)[BB]->AvailableVal = PHI;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
-      if (!IncomingPHIVal) continue;
-      BB = IncomingPHIVal->getParent();
-      BBInfo *Info = (*BBMap)[BB];
-      if (!Info || Info->AvailableVal)
-        continue;
-
-      // Record the PHI and add it to the worklist.
-      AvailableVals[BB] = IncomingPHIVal;
-      Info->AvailableVal = IncomingPHIVal;
-      WorkList.push_back(IncomingPHIVal);
-    }
+    // If the client wants to know about all new instructions, tell it.
+    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
   }
-}
 
-/// ClearPHITags - When one of the existing PHI nodes fails to match, clear
-/// the PHITag values that were stored in the BBMap when checking to see if
-/// it matched.
-void SSAUpdater::ClearPHITags(PHINode *PHI) {
-  BBMapTy *BBMap = getBBMap(BM);
-  SmallVector<PHINode*, 20> WorkList;
-  WorkList.push_back(PHI);
-
-  // Clear the tag for this PHI.
-  (*BBMap)[PHI->getParent()]->PHITag = 0;
-
-  while (!WorkList.empty()) {
-    PHI = WorkList.pop_back_val();
-
-    // Iterate through the PHI's incoming values.
-    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
-      PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
-      if (!IncomingPHIVal) continue;
-      BasicBlock *BB = IncomingPHIVal->getParent();
-      BBInfo *Info = (*BBMap)[BB];
-      if (!Info || Info->AvailableVal || !Info->PHITag)
-        continue;
-
-      // Clear the tag and add the PHI to the worklist.
-      Info->PHITag = 0;
-      WorkList.push_back(IncomingPHIVal);
-    }
-  }
+  return InsertedVal;
 }
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 16437bc13045..8ad53736c993 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -14,6 +14,7 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/STLExtras.h"
@@ -136,6 +137,16 @@ Instruction* BasicBlock::getFirstNonPHI() {
   return &*i;
 }
 
+Instruction* BasicBlock::getFirstNonPHIOrDbg() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
+  return &*i;
+}
+
 void BasicBlock::dropAllReferences() {
   for(iterator I = begin(), E = end(); I != E; ++I)
     I->dropAllReferences();
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 44d487a8e2b4..634407ca13ff 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1671,7 +1671,7 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
 
 void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
   MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
-  unwrap(Builder)->SetCurrentDebugLocation(NewDebugLoc::getFromDILocation(Loc));
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
 }
 
 LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index f02ce57c3b04..f8b45eed0d5e 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
 // DebugLoc Implementation
 //===----------------------------------------------------------------------===//
 
-MDNode *NewDebugLoc::getScope(const LLVMContext &Ctx) const {
+MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
   if (ScopeIdx == 0) return 0;
   
   if (ScopeIdx > 0) {
@@ -32,7 +32,7 @@ MDNode *NewDebugLoc::getScope(const LLVMContext &Ctx) const {
   return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
 }
 
-MDNode *NewDebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
   // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
   // position specified.  Zero is invalid.
   if (ScopeIdx >= 0) return 0;
@@ -44,8 +44,8 @@ MDNode *NewDebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
 }
 
 /// Return both the Scope and the InlinedAt values.
-void NewDebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
-                                       const LLVMContext &Ctx) const {
+void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+                                    const LLVMContext &Ctx) const {
   if (ScopeIdx == 0) {
     Scope = IA = 0;
     return;
@@ -69,9 +69,9 @@ void NewDebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
 }
 
 
-NewDebugLoc NewDebugLoc::get(unsigned Line, unsigned Col,
-                             MDNode *Scope, MDNode *InlinedAt) {
-  NewDebugLoc Result;
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
+                       MDNode *Scope, MDNode *InlinedAt) {
+  DebugLoc Result;
   
   // If no scope is available, this is an unknown location.
   if (Scope == 0) return Result;
@@ -95,7 +95,7 @@ NewDebugLoc NewDebugLoc::get(unsigned Line, unsigned Col,
 
 /// getAsMDNode - This method converts the compressed DebugLoc node into a
 /// DILocation compatible MDNode.
-MDNode *NewDebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
   if (isUnknown()) return 0;
   
   MDNode *Scope, *IA;
@@ -111,12 +111,12 @@ MDNode *NewDebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
   return MDNode::get(Ctx2, &Elts[0], 4);
 }
 
-/// getFromDILocation - Translate the DILocation quad into a NewDebugLoc.
-NewDebugLoc NewDebugLoc::getFromDILocation(MDNode *N) {
-  if (N == 0 || N->getNumOperands() != 4) return NewDebugLoc();
+/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
+  if (N == 0 || N->getNumOperands() != 4) return DebugLoc();
   
   MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(2));
-  if (Scope == 0) return NewDebugLoc();
+  if (Scope == 0) return DebugLoc();
   
   unsigned LineNo = 0, ColNo = 0;
   if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(0)))
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 73e60912e43b..72de0321c3aa 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -425,7 +425,7 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const {
 }
 
 void Instruction::setDbgMetadata(MDNode *Node) {
-  DbgLoc = NewDebugLoc::getFromDILocation(Node);
+  DbgLoc = DebugLoc::getFromDILocation(Node);
 }
 
 /// setMetadata - Set the metadata of of the specified kind to the specified
@@ -436,7 +436,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
 
   // Handle 'dbg' as a special case since it is not stored in the hash table.
   if (KindID == LLVMContext::MD_dbg) {
-    DbgLoc = NewDebugLoc::getFromDILocation(Node);
+    DbgLoc = DebugLoc::getFromDILocation(Node);
     return;
   }
   
@@ -549,7 +549,7 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
 /// removeAllMetadata - Remove all metadata from this instruction.
 void Instruction::removeAllMetadata() {
   assert(hasMetadata() && "Caller should check");
-  DbgLoc = NewDebugLoc();
+  DbgLoc = DebugLoc();
   if (hasMetadataHashEntry()) {
     getContext().pImpl->MetadataStore.erase(this);
     setHasMetadataHashEntry(false);
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index a782e5a82e91..609375012480 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PassNameParser.h"
@@ -42,6 +43,11 @@ Pass::~Pass() {
 // Force out-of-line virtual method.
 ModulePass::~ModulePass() { }
 
+Pass *ModulePass::createPrinterPass(raw_ostream &O,
+                                    const std::string &Banner) const {
+  return createPrintModulePass(&O, false, Banner);
+}
+
 PassManagerType ModulePass::getPotentialPassManagerType() const {
   return PMT_ModulePassManager;
 }
@@ -113,6 +119,11 @@ void ImmutablePass::initializePass() {
 // FunctionPass Implementation
 //
 
+Pass *FunctionPass::createPrinterPass(raw_ostream &O,
+                                      const std::string &Banner) const {
+  return createPrintFunctionPass(Banner, &O);
+}
+
 // run - On a module, we run this pass by initializing, runOnFunction'ing once
 // for every function in the module, then by finalizing.
 //
@@ -155,6 +166,13 @@ PassManagerType FunctionPass::getPotentialPassManagerType() const {
 // BasicBlockPass Implementation
 //
 
+Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
+                                        const std::string &Banner) const {
+  
+  llvm_unreachable("BasicBlockPass printing unsupported.");
+  return 0;
+}
+
 // To run this pass on a function, we simply call runOnBasicBlock once for each
 // function.
 //
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 6774cecdcf24..6ca35ac0260f 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -13,6 +13,7 @@
 
 
 #include "llvm/PassManagers.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -20,6 +21,7 @@
 #include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Mutex.h"
 #include "llvm/System/Threading.h"
@@ -55,6 +57,57 @@ PassDebugging("debug-pass", cl::Hidden,
   clEnumVal(Executions, "print pass name before it is executed"),
   clEnumVal(Details   , "print pass details when it is executed"),
                              clEnumValEnd));
+
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+            llvm::cl::desc("Print IR before specified passes"));
+
+static PassOptionList
+PrintAfter("print-after",
+           llvm::cl::desc("Print IR after specified passes"));
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+               llvm::cl::desc("Print IR before each pass"),
+               cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+              llvm::cl::desc("Print IR after each pass"),
+              cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(Pass *P,
+                                         PassOptionList &PassesToPrint) {
+  for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+    const llvm::PassInfo *PassInf = PassesToPrint[i];
+    if (PassInf && P->getPassInfo())
+      if (PassInf->getPassArgument() ==
+          P->getPassInfo()->getPassArgument()) {
+        return true;
+      }
+  }
+  return false;
+}
+  
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(Pass *P) {
+  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(P, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(Pass *P) {
+  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(P, PrintAfter);
+}
+
 } // End of llvm namespace
 
 /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
@@ -182,6 +235,11 @@ public:
     schedulePass(P);
   }
  
+  /// createPrinterPass - Get a function printer pass. 
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintFunctionPass(Banner, &O);
+  }
+
   // Prepare for running an on the fly pass, freeing memory if needed
   // from a previous run.
   void releaseMemoryOnTheFly();
@@ -252,6 +310,11 @@ public:
     }
   }
 
+  /// createPrinterPass - Get a module printer pass. 
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
   bool runOnModule(Module &M);
@@ -331,6 +394,11 @@ public:
     schedulePass(P);
   }
  
+  /// createPrinterPass - Get a module printer pass. 
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
   bool run(Module &M);
@@ -1208,7 +1276,14 @@ FunctionPassManager::~FunctionPassManager() {
 /// there is no need to delete the pass. (TODO delete passes.)
 /// This implies that all passes MUST be allocated with 'new'.
 void FunctionPassManager::add(Pass *P) { 
+  if (ShouldPrintBeforePass(P))
+    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                             + P->getPassName() + " ***"));
   FPM->add(P);
+
+  if (ShouldPrintAfterPass(P))
+    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                             + P->getPassName() + " ***"));
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep
@@ -1519,7 +1594,15 @@ PassManager::~PassManager() {
 /// will be destroyed as well, so there is no need to delete the pass.  This
 /// implies that all passes MUST be allocated with 'new'.
 void PassManager::add(Pass *P) {
+  if (ShouldPrintBeforePass(P))
+    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                             + P->getPassName() + " ***"));
+
   PM->add(P);
+
+  if (ShouldPrintAfterPass(P))
+    add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                             + P->getPassName() + " ***"));
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep track of
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index f0f6e7a9efe7..2d69dce07f3f 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -23,21 +23,22 @@ using namespace llvm;
 namespace {
 
   class PrintModulePass : public ModulePass {
+    std::string Banner;
     raw_ostream *Out;       // raw_ostream to print on
     bool DeleteStream;      // Delete the ostream in our dtor?
   public:
     static char ID;
     PrintModulePass() : ModulePass(&ID), Out(&dbgs()), 
       DeleteStream(false) {}
-    PrintModulePass(raw_ostream *o, bool DS)
-      : ModulePass(&ID), Out(o), DeleteStream(DS) {}
+    PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
+        : ModulePass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
     
     ~PrintModulePass() {
       if (DeleteStream) delete Out;
     }
     
     bool runOnModule(Module &M) {
-      (*Out) << M;
+      (*Out) << Banner << M;
       return false;
     }
     
@@ -85,8 +86,9 @@ Y("print-function","Print function to stderr");
 /// createPrintModulePass - Create and return a pass that writes the
 /// module to the specified raw_ostream.
 ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS, 
-                                        bool DeleteStream) {
-  return new PrintModulePass(OS, DeleteStream);
+                                        bool DeleteStream,
+                                        const std::string &Banner) {
+  return new PrintModulePass(Banner, OS, DeleteStream);
 }
 
 /// createPrintFunctionPass - Create and return a pass that prints
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 702632cde965..0e1559548e2b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -1,13 +1,11 @@
-; RUN: llc < %s | FileCheck %s
-
-target triple = "i386"
+; RUN: llc -mtriple=i386-apple-darwin < %s | FileCheck %s
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
 define fastcc void @t1() nounwind {
 entry:
 ; CHECK: t1:
-; CHECK: call memset
+; CHECK: call _memset
   call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
   unreachable
 }
@@ -15,7 +13,7 @@ entry:
 define fastcc void @t2(i8 signext %c) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: call memset
+; CHECK: call _memset
   call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
   unreachable
 }
diff --git a/test/CodeGen/X86/memset-3.ll b/test/CodeGen/X86/memset-3.ll
new file mode 100644
index 000000000000..9b20ad506a5f
--- /dev/null
+++ b/test/CodeGen/X86/memset-3.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=i386-apple-darwin < %s | not grep memset
+; PR6767
+
+define void @t() nounwind ssp {
+entry:
+  %buf = alloca [512 x i8], align 1
+  %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
+  call void @llvm.memset.i32(i8* %ptr, i8 undef, i32 512, i32 1)
+  unreachable
+}
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt
new file mode 100644
index 000000000000..094a2d737246
--- /dev/null
+++ b/test/MC/Disassembler/arm-tests.txt
@@ -0,0 +1,62 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
+
+# CHECK:	b	#0
+0xfe 0xff 0xff 0xea
+
+# CHECK:	bfc	r8, #0, #16
+0x1f 0x80 0xcf 0xe7
+
+# CHECK:	bfi	r8, r0, #16, #1
+0x10 0x88 0xd0 0xe7
+
+# CHECK:	cmn	r0, #1
+0x01 0x00 0x70 0xe3
+
+# CHECK:	dmb	nshst
+0x56 0xf0 0x7f 0xf5
+
+# CHECK:	ldr	r0, [r2], #15
+0x0f 0x00 0x92 0xe4
+
+# CHECK:	lsls	r0, r2, #31
+0x82 0x0f 0xb0 0xe1
+
+# CHECK:	mcr2	p0, #0, r2, cr1, cr0, #7
+0xf0 0x20 0x01 0xfe
+
+# CHECK:	movt	r8, #65535
+0xff 0x8f 0x4f 0xe3
+
+# CHECK:	pkhbt	r8, r9, r10, lsl #4
+0x1a 0x82 0x89 0xe6
+
+# CHECK:	pop	{r0, r2, r4, r6, r8, r10}
+0x55 0x05 0xbd 0xe8
+
+# CHECK:	push	{r0, r2, r4, r6, r8, r10}
+0x55 0x05 0x2d 0xe9
+
+# CHECK:	qsax	r8, r9, r10
+0x5a 0x8f 0x29 0xe6
+
+# CHECK:	rfedb	r0!
+0x00 0x0a 0x30 0xf9
+
+# CHECK:	sbcs	r0, pc, #1
+0x01 0x00 0xdf 0xe2
+
+# CHECK:	sbfx	r0, r1, #0, #8
+0x51 0x00 0xa7 0xe7
+
+# CHECK:	ssat	r8, #1, r10, lsl #8
+0x1a 0x84 0xa0 0xe6
+
+# CHECK:	stmdb	r10!, {r4, r5, r6, r7, lr}
+0xf0 0x40 0x2a 0xe9
+
+# CHECK:	teq	r0, #31
+0x1f 0x00 0x30 0xe3
+
+# CHECK:	ubfx	r0, r0, #16, #1
+0x50 0x08 0xe0 0xe7
+
diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt
new file mode 100644
index 000000000000..5d37b8c6416d
--- /dev/null
+++ b/test/MC/Disassembler/neon-tests.txt
@@ -0,0 +1,41 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
+
+# CHECK:	vbif	q15, q7, q0
+0x50 0xe1 0x7e 0xf3
+
+# CHECK:	vcvt.f32.s32	q15, q0, #1
+0x50 0xee 0xff 0xf2
+
+# CHECK:	vdup.32	q3, d1[0]
+0x41 0x6c 0xb4 0xf3
+
+# CHECK:	vld4.8	{d0, d1, d2, d3}, [r2], r7
+0x07 0x00 0x22 0xf4
+
+# CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
+0x0f 0x41 0x22 0xf4
+
+# CHECK:	vmov	d0, d15
+0x1f 0x01 0x2f 0xf2
+
+# CHECK:	vmul.f32	d0, d0, d6
+0x16 0x0d 0x00 0xf3
+
+# CHECK:	vneg.f32	q0, q0
+0xc0 0x07 0xb9 0xf3
+
+# CHECK:	vqrdmulh.s32	d0, d0, d3[1]
+0x63 0x0d 0xa0 0xf2
+
+# CHECK:	vrshr.s32	d0, d0, #16
+0x10 0x02 0xb0 0xf2
+
+# CHECK:	vshll.i16	q3, d1, #16
+0x01 0x63 0xb6 0xf3
+
+# CHECK:	vsri.32	q15, q0, #1
+0x50 0xe4 0xff 0xf3
+
+# CHECK:	vtbx.8	d18, {d4, d5, d6}, d7
+0x47 0x2a 0xf4 0xf3
+
diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt
new file mode 100644
index 000000000000..e7e6385818ef
--- /dev/null
+++ b/test/MC/Disassembler/thumb-tests.txt
@@ -0,0 +1,81 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
+
+# CHECK:	add	r5, sp, #68
+0x11 0xad
+
+# CHECK:	adcs	r0, r0, #1
+0x50 0xf1 0x01 0x00
+
+# CHECK:	b	#34
+0x0f 0xe0
+
+# CHECK:	bfi	r2, r10, #0, #1
+0x6a 0xf3 0x00 0x02
+
+# CHECK:	cbnz	r7, #20
+0x57 0xb9
+
+# CHECK:	cmp	r3, r4
+0xa3 0x42
+
+# CHECK:	cmn.w	r0, #31
+0x10 0xf1 0x1f 0x0f
+
+# CHECK:	ldmia	r0!, {r1}
+0x02 0xc8
+
+# CHECK:	ldrd	r0, r1, [r7, #64]!
+0xf7 0xe9 0x10 0x01
+
+# CHECK:	lsls.w	r0, pc, #1
+0x5f 0xea 0x4f 0x00
+
+# CHECK:	mov	r11, r7
+0xbb 0x46
+
+# CHECK:	pkhtb	r2, r4, r6, asr #16
+0xc4 0xea 0x26 0x42
+
+# CHECK:	pop	{r2, r4, r6, r8, r10, r12}
+0xbd 0xe8 0x54 0x15
+
+# CHECK:	push	{r2, r4, r6, r8, r10, r12}
+0x2d 0xe9 0x54 0x15
+
+# CHECK:	rsbs	r0, r0, #0
+0x40 0x42
+
+# CHECK:	strd	r0, [r7, #64]
+0xc7 0xe9 0x10 0x01
+
+# CHECK:	sub	sp, #60
+0x8f 0xb0
+
+# CHECK:	subw	r0, pc, #1
+0xaf 0xf2 0x01 0x00
+
+# CHECK:	uqadd16	r3, r4, r5
+0x94 0xfa 0x55 0xf3
+
+# CHECK:	usada8	r5, r4, r3, r2
+0x74 0xfb 0x03 0x25
+
+# CHECK:	uxtab16	r1, r2, r3, ror #8
+0x32 0xfa 0x93 0xf1
+
+# IT block begin
+# CHECK:	ittte	eq
+0x03 0xbf
+
+# CHECK:	moveq	r3, #3
+0x03 0x23
+
+# CHECK:	asreq	r1, r0, #5
+0x41 0x11
+
+# CHECK:	lsleq	r1, r0, #28
+0x01 0x07
+
+# CHECK:	rsbne	r1, r2, #0
+0x51 0x42
+# IT block end
diff --git a/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll b/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
deleted file mode 100644
index 066e3038b087..000000000000
--- a/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -gvn -enable-full-load-pre -S | FileCheck %s
-
-define i8* @cat(i8* %s1, ...) nounwind {
-entry:
-  br i1 undef, label %bb, label %bb3
-
-bb:                                               ; preds = %entry
-  unreachable
-
-bb3:                                              ; preds = %entry
-  store i8* undef, i8** undef, align 4
-  br i1 undef, label %bb5, label %bb6
-
-bb5:                                              ; preds = %bb3
-  unreachable
-
-bb6:                                              ; preds = %bb3
-  br label %bb12
-
-bb8:                                              ; preds = %bb12
-  br i1 undef, label %bb9, label %bb10
-
-bb9:                                              ; preds = %bb8
-  %0 = load i8** undef, align 4                   ; <i8*> [#uses=0]
-  %1 = load i8** undef, align 4                   ; <i8*> [#uses=0]
-  br label %bb11
-
-bb10:                                             ; preds = %bb8
-  br label %bb11
-
-bb11:                                             ; preds = %bb10, %bb9
-; CHECK: bb11:
-; CHECK: phi
-; CHECK-NOT: phi
-  br label %bb12
-
-bb12:                                             ; preds = %bb11, %bb6
-; CHECK: bb12:
-; CHECK: phi
-; CHECK-NOT: phi
-  br i1 undef, label %bb8, label %bb13
-
-bb13:                                             ; preds = %bb12
-; CHECK: bb13:
-  ret i8* undef
-}
diff --git a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll b/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
deleted file mode 100644
index 311d3daa8f32..000000000000
--- a/test/Transforms/IndVarSimplify/2008-11-17-Floating.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt < %s -indvars -S | grep icmp | count 2
-; RUN: opt < %s -indvars -S | grep sitofp | count 1
-; RUN: opt < %s -indvars -S | grep uitofp | count 1
-
-define void @bar() nounwind {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
-	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = fadd double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
-	%2 = fcmp olt double %1, 2147483646.0e+0		; <i1> [#uses=1]
-	br i1 %2, label %bb, label %return
-
-return:		; preds = %bb
-	ret void
-}
-
-define void @bar1() nounwind {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
-	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = fadd double %x.0.reg2mem.0, 1.0e+0		; <double> [#uses=2]
-	%2 = fcmp olt double %1, 2147483647.0e+0		; <i1> [#uses=1]
-	br i1 %2, label %bb, label %return
-
-return:		; preds = %bb
-	ret void
-}
-
-declare i32 @foo(double)
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
new file mode 100644
index 000000000000..14f79fefb180
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -0,0 +1,19 @@
+; RUN: opt -indvars %s -disable-output
+
+declare i32 @putchar(i8) nounwind
+
+define void @t2(i1* %P) nounwind {
+; <label>:0
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi double [ 9.000000e+00, %0 ], [ %4, %1 ] ; <double> [#uses=1]
+  %3 = tail call i32 @putchar(i8 72)              ; <i32> [#uses=0]
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=2]
+  %5 = fcmp ult double %4, 0.000000e+00           ; <i1> [#uses=1]
+  store i1 %5, i1* %P
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+\ No newline at end of file
diff --git a/test/Transforms/IndVarSimplify/dangling-use.ll b/test/Transforms/IndVarSimplify/dangling-use.ll
new file mode 100644
index 000000000000..51c31204c6df
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/dangling-use.ll
@@ -0,0 +1,41 @@
+; RUN: opt -indvars -disable-output < %s 
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i8:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin11"
+
+define void @vec_inverse_5_7_vert_loop_copyseparate(i8* %x, i32 %n, i32 %rowbytes) nounwind {
+entry:
+  %tmp1 = sdiv i32 %n, 3                          ; <i32> [#uses=1]
+  %tmp2 = sdiv i32 %rowbytes, 5                   ; <i32> [#uses=2]
+  br label %bb49
+
+bb49:                                             ; preds = %bb48, %entry
+  %x_addr.0 = phi i8* [ %x, %entry ], [ %tmp481, %bb48 ] ; <i8*> [#uses=2]
+  br label %bb10
+
+bb10:                                             ; preds = %bb49
+  %tmp326 = mul nsw i32 %tmp1, %tmp2              ; <i32> [#uses=1]
+  %tmp351 = getelementptr inbounds i8* %x_addr.0, i32 %tmp326 ; <i8*> [#uses=1]
+  br i1 false, label %bb.nph, label %bb48
+
+bb.nph:                                           ; preds = %bb10
+  br label %bb23
+
+bb23:                                             ; preds = %bb28, %bb.nph
+  %pOriginHi.01 = phi i8* [ %tmp351, %bb.nph ], [ %pOriginHi.0, %bb28 ] ; <i8*> [#uses=2]
+  %tmp378 = bitcast i8* %pOriginHi.01 to i8*      ; <i8*> [#uses=1]
+  store i8* %tmp378, i8** null
+  %tmp385 = getelementptr inbounds i8* %pOriginHi.01, i32 %tmp2 ; <i8*> [#uses=1]
+  br label %bb28
+
+bb28:                                             ; preds = %bb23
+  %pOriginHi.0 = phi i8* [ %tmp385, %bb23 ]       ; <i8*> [#uses=1]
+  br i1 false, label %bb23, label %bb28.bb48_crit_edge
+
+bb28.bb48_crit_edge:                              ; preds = %bb28
+  br label %bb48
+
+bb48:                                             ; preds = %bb28.bb48_crit_edge, %bb10
+  %tmp481 = getelementptr inbounds i8* %x_addr.0, i32 1 ; <i8*> [#uses=1]
+  br label %bb49
+}
diff --git a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll b/test/Transforms/IndVarSimplify/floating-point-iv.ll
index 7b4032b2eba9..8f4b87048a4d 100644
--- a/test/Transforms/IndVarSimplify/2008-11-03-Floating.ll
+++ b/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -indvars -S | grep icmp | count 4
-define void @bar() nounwind {
+; RUN: opt < %s -indvars -S | FileCheck %s
+define void @test1() nounwind {
 entry:
 	br label %bb
 
@@ -12,11 +12,13 @@ bb:		; preds = %bb, %entry
 
 return:		; preds = %bb
 	ret void
+; CHECK: @test1
+; CHECK: icmp
 }
 
 declare i32 @foo(double)
 
-define void @bar2() nounwind {
+define void @test2() nounwind {
 entry:
 	br label %bb
 
@@ -29,25 +31,29 @@ bb:		; preds = %bb, %entry
 
 return:		; preds = %bb
 	ret void
+; CHECK: @test2
+; CHECK: icmp
 }
 
 
-define void @bar3() nounwind {
+define void @test3() nounwind {
 entry:
 	br label %bb
 
 bb:		; preds = %bb, %entry
-	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
-	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
-	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
-	%2 = fcmp olt double %1, -1.000000e+00		; <i1> [#uses=1]
+	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]
+	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00
+	%2 = fcmp olt double %1, -1.000000e+00
 	br i1 %2, label %bb, label %return
 
-return:		; preds = %bb
+return:
 	ret void
+; CHECK: @test3
+; CHECK: fcmp
 }
 
-define void @bar4() nounwind {
+define void @test4() nounwind {
 entry:
 	br label %bb
 
@@ -58,8 +64,29 @@ bb:		; preds = %bb, %entry
 	%2 = fcmp olt double %1, 1.000000e+00		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %return
 
-return:		; preds = %bb
+return:
 	ret void
+; CHECK: @test4
+; CHECK: fcmp
 }
 
+; PR6761
+define void @test5() nounwind {
+; <label>:0
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi double [ 9.000000e+00, %0 ], [ %4, %1 ] ; <double> [#uses=1]
+  %3 = tail call i32 @foo(double 0.0)              ; <i32> [#uses=0]
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=2]
+  %5 = fcmp ult double %4, 0.000000e+00           ; <i1> [#uses=1]
+  br i1 %5, label %exit, label %1
+
+exit:
+  ret void
+  
+; CHECK: @test5
+; CHECK: icmp eq i32 {{.*}}, 10
+; CHECK-NEXT: br i1
+}
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 6d5b2b51a8b7..732ff11fe46d 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -237,6 +237,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
     case bitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
     case bitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
+    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
+    case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
     }
   case bitc::TYPE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
@@ -259,9 +261,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     default:return 0;
     case bitc::METADATA_STRING:      return "MDSTRING";
     case bitc::METADATA_NODE:        return "MDNODE";
+    case bitc::METADATA_FN_NODE:     return "FN_MDNODE";
     case bitc::METADATA_NAME:        return "METADATA_NAME";
     case bitc::METADATA_NAMED_NODE:  return "NAMEDMDNODE";
     case bitc::METADATA_KIND:        return "METADATA_KIND";
+    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
     }
   }
 }
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/plugins/Base/Base.td.in
index 3ad07c0d83a1..3c4f4e959539 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/plugins/Base/Base.td.in
@@ -46,6 +46,8 @@ def OptList : OptionList<[
     (help "Relocation model: PIC"), (hidden)),
  (switch_option "mdynamic-no-pic",
     (help "Relocation model: dynamic-no-pic"), (hidden)),
+ (switch_option "shared",
+     (help "Create a DLL instead of the regular executable")),
  (parameter_option "linker",
     (help "Choose linker (possible values: gcc, g++)")),
  (parameter_option "mtune",
@@ -281,6 +283,7 @@ class llvm_gcc_based_linker <string cmd_prefix> : Tool<
           (not_empty "l"), (forward "l"),
           (not_empty "Xlinker"), (forward "Xlinker"),
           (not_empty "Wl,"), (forward "Wl,"),
+          (switch_on "shared"), (forward "shared"),
           (switch_on "dynamiclib"), (forward "dynamiclib"),
           (switch_on "prebind"), (forward "prebind"),
           (switch_on "dead_strip"), (forward "dead_strip"),
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
new file mode 100644
index 000000000000..5fb8b6bfb232
--- /dev/null
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -0,0 +1,1862 @@
+//===------------ ARMDecoderEmitter.cpp - Decoder Generator ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the tablegen backend that emits the decoder functions for ARM and
+// Thumb.  The disassembler core includes the auto-generated file, invokes the
+// decoder functions, and builds up the MCInst based on the decoded Opcode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-decoder-emitter"
+
+#include "ARMDecoderEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+#include <map>
+#include <string>
+
+using namespace llvm;
+
+/////////////////////////////////////////////////////
+//                                                 //
+//  Enums and Utilities for ARM Instruction Format //
+//                                                 //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS                   \
+  ENTRY(ARM_FORMAT_PSEUDO,         0) \
+  ENTRY(ARM_FORMAT_MULFRM,         1) \
+  ENTRY(ARM_FORMAT_BRFRM,          2) \
+  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
+  ENTRY(ARM_FORMAT_DPFRM,          4) \
+  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
+  ENTRY(ARM_FORMAT_LDFRM,          6) \
+  ENTRY(ARM_FORMAT_STFRM,          7) \
+  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
+  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
+  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
+  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
+  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
+  ENTRY(ARM_FORMAT_EXTFRM,        13) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   14) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  15) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   16) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   20) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    21) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    23) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      24) \
+  ENTRY(ARM_FORMAT_NEONFRM,       25) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  26) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  27) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    28) \
+  ENTRY(ARM_FORMAT_MISCFRM,       29) \
+  ENTRY(ARM_FORMAT_THUMBMISCFRM,  30) \
+  ENTRY(ARM_FORMAT_NLdSt,         31) \
+  ENTRY(ARM_FORMAT_N1RegModImm,   32) \
+  ENTRY(ARM_FORMAT_N2Reg,         33) \
+  ENTRY(ARM_FORMAT_NVCVT,         34) \
+  ENTRY(ARM_FORMAT_NVecDupLn,     35) \
+  ENTRY(ARM_FORMAT_N2RegVecShL,   36) \
+  ENTRY(ARM_FORMAT_N2RegVecShR,   37) \
+  ENTRY(ARM_FORMAT_N3Reg,         38) \
+  ENTRY(ARM_FORMAT_N3RegVecSh,    39) \
+  ENTRY(ARM_FORMAT_NVecExtract,   40) \
+  ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
+  ENTRY(ARM_FORMAT_NVTBL,         42)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  ARM_FORMATS
+  ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    ARM_FORMATS
+  case ARM_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+enum {
+  IndexModeNone = 0,
+  IndexModePre  = 1,
+  IndexModePost = 2,
+  IndexModeUpd  = 3
+};
+
+/////////////////////////
+//                     //
+//  Utility functions  //
+//                     //
+/////////////////////////
+
+/// byteFromBitsInit - Return the byte value from a BitsInit.
+/// Called from getByteField().
+static uint8_t byteFromBitsInit(BitsInit &init) {
+  int width = init.getNumBits();
+
+  assert(width <= 8 && "Field is too large for uint8_t!");
+
+  int index;
+  uint8_t mask = 0x01;
+
+  uint8_t ret = 0;
+
+  for (index = 0; index < width; index++) {
+    if (static_cast<BitInit*>(init.getBit(index))->getValue())
+      ret |= mask;
+
+    mask <<= 1;
+  }
+
+  return ret;
+}
+
+static uint8_t getByteField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return byteFromBitsInit(*bits);
+}
+
+static BitsInit &getBitsField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return *bits;
+}
+
+/// sameStringExceptSuffix - Return true if the two strings differ only in RHS's
+/// suffix.  ("VST4d8", "VST4d8_UPD", "_UPD") as input returns true.
+static
+bool sameStringExceptSuffix(const StringRef LHS, const StringRef RHS,
+                            const StringRef Suffix) {
+
+  if (RHS.startswith(LHS) && RHS.endswith(Suffix))
+    return RHS.size() == LHS.size() + Suffix.size();
+
+  return false;
+}
+
+/// thumbInstruction - Determine whether we have a Thumb instruction.
+/// See also ARMInstrFormats.td.
+static bool thumbInstruction(uint8_t Form) {
+  return Form == ARM_FORMAT_THUMBFRM;
+}
+
+// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
+// for a bit value.
+//
+// BIT_UNFILTERED is used as the init value for a filter position.  It is used
+// only for filter processings.
+typedef enum {
+  BIT_TRUE,      // '1'
+  BIT_FALSE,     // '0'
+  BIT_UNSET,     // '?'
+  BIT_UNFILTERED // unfiltered
+} bit_value_t;
+
+static bool ValueSet(bit_value_t V) {
+  return (V == BIT_TRUE || V == BIT_FALSE);
+}
+static bool ValueNotSet(bit_value_t V) {
+  return (V == BIT_UNSET);
+}
+static int Value(bit_value_t V) {
+  return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
+}
+static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
+    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
+
+  // The bit is uninitialized.
+  return BIT_UNSET;
+}
+// Prints the bit value for each position.
+static void dumpBits(raw_ostream &o, BitsInit &bits) {
+  unsigned index;
+
+  for (index = bits.getNumBits(); index > 0; index--) {
+    switch (bitFromBits(bits, index - 1)) {
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    default:
+      assert(0 && "unexpected return value from bitFromBits");
+    }
+  }
+}
+
+// Enums for the available target names.
+typedef enum {
+  TARGET_ARM = 0,
+  TARGET_THUMB
+} TARGET_NAME_t;
+
+// FIXME: Possibly auto-detected?
+#define BIT_WIDTH 32
+
+// Forward declaration.
+class FilterChooser;
+
+// Representation of the instruction to work on.
+typedef bit_value_t insn_t[BIT_WIDTH];
+
+/// Filter - Filter works with FilterChooser to produce the decoding tree for
+/// the ISA.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree in a certain level.  Each case stmt delegates to an inferior
+/// FilterChooser to decide what further decoding logic to employ, or in another
+/// words, what other remaining bits to look at.  The FilterChooser eventually
+/// chooses a best Filter to do its job.
+///
+/// This recursive scheme ends when the number of Opcodes assigned to the
+/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
+/// the Filter/FilterChooser combo does not know how to distinguish among the
+/// Opcodes assigned.
+///
+/// An example of a conflcit is 
+///
+/// Conflict:
+///                     111101000.00........00010000....
+///                     111101000.00........0001........
+///                     1111010...00........0001........
+///                     1111010...00....................
+///                     1111010.........................
+///                     1111............................
+///                     ................................
+///     VST4q8a         111101000_00________00010000____
+///     VST4q8b         111101000_00________00010000____
+///
+/// The Debug output shows the path that the decoding tree follows to reach the
+/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
+/// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters.
+///
+/// The encoding info in the .td files does not specify this meta information,
+/// which could have been used by the decoder to resolve the conflict.  The
+/// decoder could try to decode the even/odd register numbering and assign to
+/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
+/// version and return the Opcode since the two have the same Asm format string.
+class Filter {
+protected:
+  FilterChooser *Owner; // points to the FilterChooser who owns this filter
+  unsigned StartBit; // the starting bit position
+  unsigned NumBits; // number of bits to filter
+  bool Mixed; // a mixed region contains both set and unset bits
+
+  // Map of well-known segment value to the set of uid's with that value. 
+  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
+
+  // Set of uid's with non-constant segment values.
+  std::vector<unsigned> VariableInstructions;
+
+  // Map of well-known segment value to its delegate.
+  std::map<unsigned, FilterChooser*> FilterChooserMap;
+
+  // Number of instructions which fall under FilteredInstructions category.
+  unsigned NumFiltered;
+
+  // Keeps track of the last opcode in the filtered bucket.
+  unsigned LastOpcFiltered;
+
+  // Number of instructions which fall under VariableInstructions category.
+  unsigned NumVariable;
+
+public:
+  unsigned getNumFiltered() { return NumFiltered; }
+  unsigned getNumVariable() { return NumVariable; }
+  unsigned getSingletonOpc() {
+    assert(NumFiltered == 1);
+    return LastOpcFiltered;
+  }
+  // Return the filter chooser for the group of instructions without constant
+  // segment values.
+  FilterChooser &getVariableFC() {
+    assert(NumFiltered == 1);
+    assert(FilterChooserMap.size() == 1);
+    return *(FilterChooserMap.find((unsigned)-1)->second);
+  }
+
+  Filter(const Filter &f);
+  Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
+
+  ~Filter();
+
+  // Divides the decoding task into sub tasks and delegates them to the
+  // inferior FilterChooser's.
+  //
+  // A special case arises when there's only one entry in the filtered
+  // instructions.  In order to unambiguously decode the singleton, we need to
+  // match the remaining undecoded encoding bits against the singleton.
+  void recurse();
+
+  // Emit code to decode instructions given a segment or segments of bits.
+  void emit(raw_ostream &o, unsigned &Indentation);
+
+  // Returns the number of fanout produced by the filter.  More fanout implies
+  // the filter distinguishes more categories of instructions.
+  unsigned usefulness() const;
+}; // End of class Filter
+
+// These are states of our finite state machines used in FilterChooser's
+// filterProcessor() which produces the filter candidates to use.
+typedef enum {
+  ATTR_NONE,
+  ATTR_FILTERED,
+  ATTR_ALL_SET,
+  ATTR_ALL_UNSET,
+  ATTR_MIXED
+} bitAttr_t;
+
+/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
+/// in order to perform the decoding of instructions at the current level.
+///
+/// Decoding proceeds from the top down.  Based on the well-known encoding bits
+/// of instructions available, FilterChooser builds up the possible Filters that
+/// can further the task of decoding by distinguishing among the remaining
+/// candidate instructions.
+///
+/// Once a filter has been chosen, it is called upon to divide the decoding task
+/// into sub-tasks and delegates them to its inferior FilterChoosers for further
+/// processings.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree.  And each case is delegated to an inferior FilterChooser to
+/// decide what further remaining bits to look at.
+class FilterChooser {
+  static TARGET_NAME_t TargetName;
+
+protected:
+  friend class Filter;
+
+  // Vector of codegen instructions to choose our filter.
+  const std::vector<const CodeGenInstruction*> &AllInstructions;
+
+  // Vector of uid's for this filter chooser to work on.
+  const std::vector<unsigned> Opcodes;
+
+  // Vector of candidate filters.
+  std::vector<Filter> Filters;
+
+  // Array of bit values passed down from our parent.
+  // Set to all BIT_UNFILTERED's for Parent == NULL.
+  bit_value_t FilterBitValues[BIT_WIDTH];
+
+  // Links to the FilterChooser above us in the decoding tree.
+  FilterChooser *Parent;
+  
+  // Index of the best filter from Filters.
+  int BestIndex;
+
+public:
+  static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; }
+
+  FilterChooser(const FilterChooser &FC) :
+      AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
+      Filters(FC.Filters), Parent(FC.Parent), BestIndex(FC.BestIndex) {
+    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs) :
+      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(NULL),
+      BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = BIT_UNFILTERED;
+
+    doFilter();
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs,
+                bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
+                FilterChooser &parent) :
+      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(&parent),
+      BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = ParentFilterBitValues[i];
+
+    doFilter();
+  }
+
+  // The top level filter chooser has NULL as its parent.
+  bool isTopLevel() { return Parent == NULL; }
+
+  // This provides an opportunity for target specific code emission.
+  void emitTopHook(raw_ostream &o);
+
+  // Emit the top level typedef and decodeInstruction() function.
+  void emitTop(raw_ostream &o, unsigned &Indentation);
+
+  // This provides an opportunity for target specific code emission after
+  // emitTop().
+  void emitBot(raw_ostream &o, unsigned &Indentation);
+
+protected:
+  // Populates the insn given the uid.
+  void insnWithID(insn_t &Insn, unsigned Opcode) const {
+    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      Insn[i] = bitFromBits(Bits, i);
+
+    // Set Inst{21} to 1 (wback) when IndexModeBits == IndexModeUpd.
+    if (getByteField(*AllInstructions[Opcode]->TheDef, "IndexModeBits")
+        == IndexModeUpd)
+      Insn[21] = BIT_TRUE;
+  }
+
+  // Returns the record name.
+  const std::string &nameWithID(unsigned Opcode) const {
+    return AllInstructions[Opcode]->TheDef->getName();
+  }
+
+  // Populates the field of the insn given the start position and the number of
+  // consecutive bits to scan for.
+  //
+  // Returns false if there exists any uninitialized bit value in the range.
+  // Returns true, otherwise.
+  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
+      unsigned NumBits) const;
+
+  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+  /// filter array as a series of chars.
+  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]);
+
+  /// dumpStack - dumpStack traverses the filter chooser chain and calls
+  /// dumpFilterArray on each filter chooser up to the top level one.
+  void dumpStack(raw_ostream &o, const char *prefix);
+
+  Filter &bestFilter() {
+    assert(BestIndex != -1 && "BestIndex not set");
+    return Filters[BestIndex];
+  }
+
+  // Called from Filter::recurse() when singleton exists.  For debug purpose.
+  void SingletonExists(unsigned Opc);
+
+  bool PositionFiltered(unsigned i) {
+    return ValueSet(FilterBitValues[i]);
+  }
+
+  // Calculates the island(s) needed to decode the instruction.
+  // This returns a lit of undecoded bits of an instructions, for example,
+  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+  // decoded bits in order to verify that the instruction matches the Opcode.
+  unsigned getIslands(std::vector<unsigned> &StartBits,
+      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+      insn_t &Insn);
+
+  // The purpose of this function is for the API client to detect possible
+  // Load/Store Coprocessor instructions.  If the coprocessor number is of
+  // the instruction is either 10 or 11, the decoder should not report the
+  // instruction as LDC/LDC2/STC/STC2, but should match against Advanced SIMD or
+  // VFP instructions.
+  bool LdStCopEncoding1(unsigned Opc) {
+    const std::string &Name = nameWithID(Opc);
+    if (Name == "LDC_OFFSET" || Name == "LDC_OPTION" ||
+        Name == "LDC_POST" || Name == "LDC_PRE" ||
+        Name == "LDCL_OFFSET" || Name == "LDCL_OPTION" ||
+        Name == "LDCL_POST" || Name == "LDCL_PRE" ||
+        Name == "STC_OFFSET" || Name == "STC_OPTION" ||
+        Name == "STC_POST" || Name == "STC_PRE" ||
+        Name == "STCL_OFFSET" || Name == "STCL_OPTION" ||
+        Name == "STCL_POST" || Name == "STCL_PRE")
+      return true;
+    else
+      return false;
+  }
+
+  // Emits code to decode the singleton.  Return true if we have matched all the
+  // well-known bits.
+  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+
+  // Emits code to decode the singleton, and then to decode the rest.
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,Filter &Best);
+
+  // Assign a single filter and run with it.
+  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
+      bool mixed);
+
+  // reportRegion is a helper function for filterProcessor to mark a region as
+  // eligible for use as a filter region.
+  void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
+      bool AllowMixed);
+
+  // FilterProcessor scans the well-known encoding bits of the instructions and
+  // builds up a list of candidate filters.  It chooses the best filter and
+  // recursively descends down the decoding tree.
+  bool filterProcessor(bool AllowMixed, bool Greedy = true);
+
+  // Decides on the best configuration of filter(s) to use in order to decode
+  // the instructions.  A conflict of instructions may occur, in which case we
+  // dump the conflict set to the standard error.
+  void doFilter();
+
+  // Emits code to decode our share of instructions.  Returns true if the
+  // emitted code causes a return, which occurs if we know how to decode
+  // the instruction at this level or the instruction is not decodeable.
+  bool emit(raw_ostream &o, unsigned &Indentation);
+};
+
+///////////////////////////
+//                       //
+// Filter Implmenetation //
+//                       //
+///////////////////////////
+
+Filter::Filter(const Filter &f) :
+  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
+  FilteredInstructions(f.FilteredInstructions),
+  VariableInstructions(f.VariableInstructions),
+  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
+  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
+}
+
+Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
+    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
+                  Mixed(mixed) {
+  assert(StartBit + NumBits - 1 < BIT_WIDTH);
+
+  NumFiltered = 0;
+  LastOpcFiltered = 0;
+  NumVariable = 0;
+
+  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
+    insn_t Insn;
+
+    // Populates the insn given the uid.
+    Owner->insnWithID(Insn, Owner->Opcodes[i]);
+
+    uint64_t Field;
+    // Scans the segment for possibly well-specified encoding bits.
+    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
+
+    if (ok) {
+      // The encoding bits are well-known.  Lets add the uid of the
+      // instruction into the bucket keyed off the constant field value.
+      LastOpcFiltered = Owner->Opcodes[i];
+      FilteredInstructions[Field].push_back(LastOpcFiltered);
+      ++NumFiltered;
+    } else {
+      // Some of the encoding bit(s) are unspecfied.  This contributes to
+      // one additional member of "Variable" instructions.
+      VariableInstructions.push_back(Owner->Opcodes[i]);
+      ++NumVariable;
+    }
+  }
+
+  assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
+         && "Filter returns no instruction categories");
+}
+
+Filter::~Filter() {
+  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+    delete filterIterator->second;
+  }
+}
+
+// Divides the decoding task into sub tasks and delegates them to the
+// inferior FilterChooser's.
+//
+// A special case arises when there's only one entry in the filtered
+// instructions.  In order to unambiguously decode the singleton, we need to
+// match the remaining undecoded encoding bits against the singleton.
+void Filter::recurse() {
+  std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
+
+  bit_value_t BitValueArray[BIT_WIDTH];
+  // Starts by inheriting our parent filter chooser's filter bit values.
+  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
+
+  unsigned bitIndex;
+
+  if (VariableInstructions.size()) {
+    // Conservatively marks each segment position as BIT_UNSET.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+      BitValueArray[StartBit + bitIndex] = BIT_UNSET;
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // group of instructions whose segment values are variable.
+    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+                              (unsigned)-1,
+                              new FilterChooser(Owner->AllInstructions,
+                                                VariableInstructions,
+                                                BitValueArray,
+                                                *Owner)
+                              ));
+  }
+
+  // No need to recurse for a singleton filtered instruction.
+  // See also Filter::emit().
+  if (getNumFiltered() == 1) {
+    //Owner->SingletonExists(LastOpcFiltered);
+    assert(FilterChooserMap.size() == 1);
+    return;
+  }
+        
+  // Otherwise, create sub choosers.
+  for (mapIterator = FilteredInstructions.begin();
+       mapIterator != FilteredInstructions.end();
+       mapIterator++) {
+
+    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+      if (mapIterator->first & (1 << bitIndex))
+        BitValueArray[StartBit + bitIndex] = BIT_TRUE;
+      else
+        BitValueArray[StartBit + bitIndex] = BIT_FALSE;
+    }
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // category of instructions.
+    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+                              mapIterator->first,
+                              new FilterChooser(Owner->AllInstructions,
+                                                mapIterator->second,
+                                                BitValueArray,
+                                                *Owner)
+                              ));
+  }
+}
+
+// Emit code to decode instructions given a segment or segments of bits.
+void Filter::emit(raw_ostream &o, unsigned &Indentation) {
+  o.indent(Indentation) << "// Check Inst{";
+
+  if (NumBits > 1)
+    o << (StartBit + NumBits - 1) << '-';
+
+  o << StartBit << "} ...\n";
+
+  o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
+                        << StartBit << ", " << NumBits << ")) {\n";
+
+  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+
+  bool DefaultCase = false;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+
+    // Field value -1 implies a non-empty set of variable instructions.
+    // See also recurse().
+    if (filterIterator->first == (unsigned)-1) {
+      DefaultCase = true;
+
+      o.indent(Indentation) << "default:\n";
+      o.indent(Indentation) << "  break; // fallthrough\n";
+
+      // Closing curly brace for the switch statement.
+      // This is unconventional because we want the default processing to be
+      // performed for the fallthrough cases as well, i.e., when the "cases"
+      // did not prove a decoded instruction.
+      o.indent(Indentation) << "}\n";
+
+    } else
+      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+
+    // We arrive at a category of instructions with the same segment value.
+    // Now delegate to the sub filter chooser for further decodings.
+    // The case may fallthrough, which happens if the remaining well-known
+    // encoding bits do not match exactly.
+    if (!DefaultCase) { ++Indentation; ++Indentation; }
+
+    bool finished = filterIterator->second->emit(o, Indentation);
+    // For top level default case, there's no need for a break statement.
+    if (Owner->isTopLevel() && DefaultCase)
+      break;
+    if (!finished)
+      o.indent(Indentation) << "break;\n";
+
+    if (!DefaultCase) { --Indentation; --Indentation; }
+  }
+
+  // If there is no default case, we still need to supply a closing brace.
+  if (!DefaultCase) {
+    // Closing curly brace for the switch statement.
+    o.indent(Indentation) << "}\n";
+  }
+}
+
+// Returns the number of fanout produced by the filter.  More fanout implies
+// the filter distinguishes more categories of instructions.
+unsigned Filter::usefulness() const {
+  if (VariableInstructions.size())
+    return FilteredInstructions.size();
+  else
+    return FilteredInstructions.size() + 1;
+}
+
+//////////////////////////////////
+//                              //
+// Filterchooser Implementation //
+//                              //
+//////////////////////////////////
+
+// Define the symbol here.
+TARGET_NAME_t FilterChooser::TargetName;
+
+// This provides an opportunity for target specific code emission.
+void FilterChooser::emitTopHook(raw_ostream &o) {
+  if (TargetName == TARGET_ARM) {
+    // Emit code that references the ARMFormat data type.
+    o << "static const ARMFormat ARMFormats[] = {\n";
+    for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) {
+      const Record &Def = *(AllInstructions[i]->TheDef);
+      const std::string &Name = Def.getName();
+      if (Def.isSubClassOf("InstARM") || Def.isSubClassOf("InstThumb"))
+        o.indent(2) << 
+          stringForARMFormat((ARMFormat)getByteField(Def, "Form"));
+      else
+        o << "  ARM_FORMAT_NA";
+
+      o << ",\t// Inst #" << i << " = " << Name << '\n';
+    }
+    o << "  ARM_FORMAT_NA\t// Unreachable.\n";
+    o << "};\n\n";
+  }
+}
+
+// Emit the top level typedef and decodeInstruction() function.
+void FilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
+  // Run the target specific emit hook.
+  emitTopHook(o);
+
+  switch (BIT_WIDTH) {
+  case 8:
+    o.indent(Indentation) << "typedef uint8_t field_t;\n";
+    break;
+  case 16:
+    o.indent(Indentation) << "typedef uint16_t field_t;\n";
+    break;
+  case 32:
+    o.indent(Indentation) << "typedef uint32_t field_t;\n";
+    break;
+  case 64:
+    o.indent(Indentation) << "typedef uint64_t field_t;\n";
+    break;
+  default:
+    assert(0 && "Unexpected instruction size!");
+  }
+
+  o << '\n';
+
+  o.indent(Indentation) << "static field_t " <<
+    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
+
+  o.indent(Indentation) << "{\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH
+                        << " && \"Instruction field out of bounds!\");\n";
+  o << '\n';
+  o.indent(Indentation) << "field_t fieldMask;\n";
+  o << '\n';
+  o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = (field_t)-1;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "else\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
+  --Indentation; --Indentation;
+
+  o << '\n';
+  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+
+  o.indent(Indentation) << "static uint16_t decodeInstruction(field_t insn) {\n";
+
+  ++Indentation; ++Indentation;
+  // Emits code to decode the instructions.
+  emit(o, Indentation);
+
+  o << '\n';
+  o.indent(Indentation) << "return 0;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+}
+
+// This provides an opportunity for target specific code emission after
+// emitTop().
+void FilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
+  if (TargetName != TARGET_THUMB) return;
+
+  // Emit code that decodes the Thumb ISA.
+  o.indent(Indentation)
+    << "static uint16_t decodeThumbInstruction(field_t insn) {\n";
+
+  ++Indentation; ++Indentation;
+
+  // Emits code to decode the instructions.
+  emit(o, Indentation);
+
+  o << '\n';
+  o.indent(Indentation) << "return 0;\n";
+
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+}
+
+// Populates the field of the insn given the start position and the number of
+// consecutive bits to scan for.
+//
+// Returns false if and on the first uninitialized bit value encountered.
+// Returns true, otherwise.
+bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
+    unsigned StartBit, unsigned NumBits) const {
+  Field = 0;
+
+  for (unsigned i = 0; i < NumBits; ++i) {
+    if (Insn[StartBit + i] == BIT_UNSET)
+      return false;
+
+    if (Insn[StartBit + i] == BIT_TRUE)
+      Field = Field | (1 << i);
+  }
+
+  return true;
+}
+
+/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+/// filter array as a series of chars.
+void FilterChooser::dumpFilterArray(raw_ostream &o,
+    bit_value_t (&filter)[BIT_WIDTH]) {
+  unsigned bitIndex;
+
+  for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) {
+    switch (filter[bitIndex - 1]) {
+    case BIT_UNFILTERED:
+      o << ".";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    }
+  }
+}
+
+/// dumpStack - dumpStack traverses the filter chooser chain and calls
+/// dumpFilterArray on each filter chooser up to the top level one.
+void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
+  FilterChooser *current = this;
+
+  while (current) {
+    o << prefix;
+    dumpFilterArray(o, current->FilterBitValues);
+    o << '\n';
+    current = current->Parent;
+  }
+}
+
+// Called from Filter::recurse() when singleton exists.  For debug purpose.
+void FilterChooser::SingletonExists(unsigned Opc) {
+  insn_t Insn0;
+  insnWithID(Insn0, Opc);
+
+  errs() << "Singleton exists: " << nameWithID(Opc)
+         << " with its decoding dominating ";
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
+    if (Opcodes[i] == Opc) continue;
+    errs() << nameWithID(Opcodes[i]) << ' ';
+  }
+  errs() << '\n';
+
+  dumpStack(errs(), "\t\t");
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+}
+
+// Calculates the island(s) needed to decode the instruction.
+// This returns a list of undecoded bits of an instructions, for example,
+// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+// decoded bits in order to verify that the instruction matches the Opcode.
+unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
+    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+    insn_t &Insn) {
+  unsigned Num, BitNo;
+  Num = BitNo = 0;
+
+  uint64_t FieldVal = 0;
+
+  // 0: Init
+  // 1: Water (the bit value does not affect decoding)
+  // 2: Island (well-known bit value needed for decoding)
+  int State = 0;
+  int Val = -1;
+
+  for (unsigned i = 0; i < BIT_WIDTH; ++i) {
+    Val = Value(Insn[i]);
+    bool Filtered = PositionFiltered(i);
+    switch (State) {
+    default:
+      assert(0 && "Unreachable code!");
+      break;
+    case 0:
+    case 1:
+      if (Filtered || Val == -1)
+        State = 1; // Still in Water
+      else {
+        State = 2; // Into the Island
+        BitNo = 0;
+        StartBits.push_back(i);
+        FieldVal = Val;
+      }
+      break;
+    case 2:
+      if (Filtered || Val == -1) {
+        State = 1; // Into the Water
+        EndBits.push_back(i - 1);
+        FieldVals.push_back(FieldVal);
+        ++Num;
+      } else {
+        State = 2; // Still in Island
+        ++BitNo;
+        FieldVal = FieldVal | Val << BitNo;
+      }
+      break;
+    }
+  }
+  // If we are still in Island after the loop, do some housekeeping.
+  if (State == 2) {
+    EndBits.push_back(BIT_WIDTH - 1);
+    FieldVals.push_back(FieldVal);
+    ++Num;
+  }
+
+  assert(StartBits.size() == Num && EndBits.size() == Num &&
+         FieldVals.size() == Num);
+  return Num;
+}
+
+// Emits code to decode the singleton.  Return true if we have matched all the
+// well-known bits.
+bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                                         unsigned Opc) {
+  std::vector<unsigned> StartBits;
+  std::vector<unsigned> EndBits;
+  std::vector<uint64_t> FieldVals;
+  insn_t Insn;
+  insnWithID(Insn, Opc);
+
+  // This provides a good opportunity to check for possible Ld/St Coprocessor
+  // Opcode and escapes if the coproc # is either 10 or 11.  It is a NEON/VFP
+  // instruction is disguise.
+  if (TargetName == TARGET_ARM && LdStCopEncoding1(Opc)) {
+    o.indent(Indentation);
+    // A8.6.51 & A8.6.188
+    // If coproc = 0b101?, i.e, slice(insn, 11, 8) = 10 or 11, escape.
+    o << "if (fieldFromInstruction(insn, 9, 3) == 5) break; // fallthrough\n";
+  }
+
+  // Look for islands of undecoded bits of the singleton.
+  getIslands(StartBits, EndBits, FieldVals, Insn);
+
+  unsigned Size = StartBits.size();
+  unsigned I, NumBits;
+
+  // If we have matched all the well-known bits, just issue a return.
+  if (Size == 0) {
+    o.indent(Indentation) << "return " << Opc << "; // " << nameWithID(Opc)
+                          << '\n';
+    return true;
+  }
+
+  // Otherwise, there are more decodings to be done!
+
+  // Emit code to match the island(s) for the singleton.
+  o.indent(Indentation) << "// Check ";
+
+  for (I = Size; I != 0; --I) {
+    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
+    if (I > 1)
+      o << "&& ";
+    else
+      o << "for singleton decoding...\n";
+  }
+
+  o.indent(Indentation) << "if (";
+
+  for (I = Size; I != 0; --I) {
+    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
+      << ") == " << FieldVals[I-1];
+    if (I > 1)
+      o << " && ";
+    else
+      o << ")\n";
+  }
+
+  o.indent(Indentation) << "  return " << Opc << "; // " << nameWithID(Opc)
+                        << '\n';
+
+  return false;
+}
+
+// Emits code to decode the singleton, and then to decode the rest.
+void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+    Filter &Best) {
+
+  unsigned Opc = Best.getSingletonOpc();
+
+  emitSingletonDecoder(o, Indentation, Opc);
+
+  // Emit code for the rest.
+  o.indent(Indentation) << "else\n";
+
+  Indentation += 2;
+  Best.getVariableFC().emit(o, Indentation);
+  Indentation -= 2;
+}
+
+// Assign a single filter and run with it.  Top level API client can initialize
+// with a single filter to start the filtering process.
+void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
+    unsigned numBit, bool mixed) {
+  Filters.clear();
+  Filter F(*this, startBit, numBit, true);
+  Filters.push_back(F);
+  BestIndex = 0; // Sole Filter instance to choose from.
+  bestFilter().recurse();
+}
+
+// reportRegion is a helper function for filterProcessor to mark a region as
+// eligible for use as a filter region.
+void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
+    unsigned BitIndex, bool AllowMixed) {
+  if (RA == ATTR_MIXED && AllowMixed)
+    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));   
+  else if (RA == ATTR_ALL_SET && !AllowMixed)
+    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, false));
+}
+
+// FilterProcessor scans the well-known encoding bits of the instructions and
+// builds up a list of candidate filters.  It chooses the best filter and
+// recursively descends down the decoding tree.
+bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
+  Filters.clear();
+  BestIndex = -1;
+  unsigned numInstructions = Opcodes.size();
+
+  assert(numInstructions && "Filter created with no instructions");
+
+  // No further filtering is necessary.
+  if (numInstructions == 1)
+    return true;
+
+  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
+  // instructions is 3.
+  if (AllowMixed && !Greedy) {
+    assert(numInstructions == 3);
+
+    for (unsigned i = 0; i < Opcodes.size(); ++i) {
+      std::vector<unsigned> StartBits;
+      std::vector<unsigned> EndBits;
+      std::vector<uint64_t> FieldVals;
+      insn_t Insn;
+
+      insnWithID(Insn, Opcodes[i]);
+
+      // Look for islands of undecoded bits of any instruction.
+      if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
+        // Found an instruction with island(s).  Now just assign a filter.
+        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
+                        true);
+        return true;
+      }
+    }
+  }
+
+  unsigned BitIndex, InsnIndex;
+
+  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
+  // The automaton consumes the corresponding bit from each
+  // instruction.
+  //
+  //   Input symbols: 0, 1, and _ (unset).
+  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
+  //   Initial state: NONE.
+  //
+  // (NONE) ------- [01] -> (ALL_SET)
+  // (NONE) ------- _ ----> (ALL_UNSET)
+  // (ALL_SET) ---- [01] -> (ALL_SET)
+  // (ALL_SET) ---- _ ----> (MIXED)
+  // (ALL_UNSET) -- [01] -> (MIXED)
+  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
+  // (MIXED) ------ . ----> (MIXED)
+  // (FILTERED)---- . ----> (FILTERED)
+
+  bitAttr_t bitAttrs[BIT_WIDTH];
+
+  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
+  // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex)
+    if (FilterBitValues[BitIndex] == BIT_TRUE ||
+        FilterBitValues[BitIndex] == BIT_FALSE)
+      bitAttrs[BitIndex] = ATTR_FILTERED;
+    else
+      bitAttrs[BitIndex] = ATTR_NONE;
+
+  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+    insn_t insn;
+
+    insnWithID(insn, Opcodes[InsnIndex]);
+
+    for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) {
+      switch (bitAttrs[BitIndex]) {
+      case ATTR_NONE:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_ALL_UNSET;
+        else
+          bitAttrs[BitIndex] = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_SET:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_ALL_UNSET:
+        if (insn[BitIndex] != BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_MIXED:
+      case ATTR_FILTERED:
+        break;
+      }
+    }
+  }
+
+  // The regionAttr automaton consumes the bitAttrs automatons' state,
+  // lowest-to-highest.
+  //
+  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
+  //   States:        NONE, ALL_SET, MIXED
+  //   Initial state: NONE
+  //
+  // (NONE) ----- F --> (NONE)
+  // (NONE) ----- S --> (ALL_SET)     ; and set region start
+  // (NONE) ----- U --> (NONE)
+  // (NONE) ----- M --> (MIXED)       ; and set region start
+  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- S --> (ALL_SET)
+  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
+  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
+  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- M --> (MIXED)
+
+  bitAttr_t RA = ATTR_NONE;
+  unsigned StartBit = 0;
+
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) {
+    bitAttr_t bitAttr = bitAttrs[BitIndex];
+
+    assert(bitAttr != ATTR_NONE && "Bit without attributes");
+
+    switch (RA) {
+    case ATTR_NONE:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        break;
+      case ATTR_ALL_SET:
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        break;
+      case ATTR_MIXED:
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_SET:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_MIXED:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_UNSET:
+      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
+    case ATTR_FILTERED:
+      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
+    }
+  }
+
+  // At the end, if we're still in ALL_SET or MIXED states, report a region
+  switch (RA) {
+  case ATTR_NONE:
+    break;
+  case ATTR_FILTERED:
+    break;
+  case ATTR_ALL_SET:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  case ATTR_ALL_UNSET:
+    break;
+  case ATTR_MIXED:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  }
+
+  // We have finished with the filter processings.  Now it's time to choose
+  // the best performing filter.
+  BestIndex = 0;
+  bool AllUseless = true;
+  unsigned BestScore = 0;
+
+  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
+    unsigned Usefulness = Filters[i].usefulness();
+
+    if (Usefulness)
+      AllUseless = false;
+
+    if (Usefulness > BestScore) {
+      BestIndex = i;
+      BestScore = Usefulness;
+    }
+  }
+
+  if (!AllUseless)
+    bestFilter().recurse();
+
+  return !AllUseless;
+} // end of FilterChooser::filterProcessor(bool)
+
+// Decides on the best configuration of filter(s) to use in order to decode
+// the instructions.  A conflict of instructions may occur, in which case we
+// dump the conflict set to the standard error.
+void FilterChooser::doFilter() {
+  unsigned Num = Opcodes.size();
+  assert(Num && "FilterChooser created with no instructions");
+
+  // Heuristics: Use Inst{31-28} as the top level filter for ARM ISA.
+  if (TargetName == TARGET_ARM && Parent == NULL) {
+    runSingleFilter(*this, 28, 4, false);
+    return;
+  }
+
+  // Try regions of consecutive known bit values first. 
+  if (filterProcessor(false))
+    return;
+
+  // Then regions of mixed bits (both known and unitialized bit values allowed).
+  if (filterProcessor(true))
+    return;
+
+  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
+  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
+  // well-known encoding pattern.  In such case, we backtrack and scan for the
+  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
+  if (Num == 3 && filterProcessor(true, false))
+    return;
+
+  // If we come to here, the instruction decoding has failed.
+  // Print out the instructions in the conflict set...
+  BestIndex = -1;
+
+  DEBUG({
+      errs() << "Conflict:\n";
+
+      dumpStack(errs(), "\t\t");
+
+      for (unsigned i = 0; i < Num; i++) {
+        const std::string &Name = nameWithID(Opcodes[i]);
+
+        errs() << '\t' << Name << " ";
+        dumpBits(errs(),
+                 getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+        errs() << '\n';
+      }
+    });
+}
+
+// Emits code to decode our share of instructions.  Returns true if the
+// emitted code causes a return, which occurs if we know how to decode
+// the instruction at this level or the instruction is not decodeable.
+bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+  if (Opcodes.size() == 1)
+    // There is only one instruction in the set, which is great!
+    // Call emitSingletonDecoder() to see whether there are any remaining
+    // encodings bits.
+    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+
+  // Choose the best filter to do the decodings!
+  if (BestIndex != -1) {
+    Filter &Best = bestFilter();
+    if (Best.getNumFiltered() == 1)
+      emitSingletonDecoder(o, Indentation, Best);
+    else
+      bestFilter().emit(o, Indentation);
+    return false;
+  }
+
+  // If we reach here, there is a conflict in decoding.  Let's resolve the known
+  // conflicts!
+  if ((TargetName == TARGET_ARM || TargetName == TARGET_THUMB) &&
+      Opcodes.size() == 2) {
+    // Resolve the known conflict sets:
+    //
+    // 1. source registers are identical => VMOVDneon; otherwise => VORRd
+    // 2. source registers are identical => VMOVQ; otherwise => VORRq
+    // 3. LDR, LDRcp => return LDR for now.
+    // FIXME: How can we distinguish between LDR and LDRcp?  Do we need to?
+    // 4. tLDM, tLDM_UPD => Rn = Inst{10-8}, reglist = Inst{7-0},
+    //    wback = registers<Rn> = 0
+    // NOTE: (tLDM, tLDM_UPD) resolution must come before Advanced SIMD
+    //       addressing mode resolution!!!
+    // 5. VLD[234]LN*/VST[234]LN* vs. VLD[234]LN*_UPD/VST[234]LN*_UPD conflicts
+    //    are resolved returning the non-UPD versions of the instructions if the
+    //    Rm field, i.e., Inst{3-0} is 0b1111.  This is specified in A7.7.1
+    //    Advanced SIMD addressing mode.
+    const std::string &name1 = nameWithID(Opcodes[0]);
+    const std::string &name2 = nameWithID(Opcodes[1]);
+    if ((name1 == "VMOVDneon" && name2 == "VORRd") ||
+        (name1 == "VMOVQ" && name2 == "VORRq")) {
+      // Inserting the opening curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "{\n";
+      ++Indentation; ++Indentation;
+
+      o.indent(Indentation)
+        << "field_t N = fieldFromInstruction(insn, 7, 1), "
+        << "M = fieldFromInstruction(insn, 5, 1);\n";
+      o.indent(Indentation)
+        << "field_t Vn = fieldFromInstruction(insn, 16, 4), "
+        << "Vm = fieldFromInstruction(insn, 0, 4);\n";
+      o.indent(Indentation)
+        << "return (N == M && Vn == Vm) ? "
+        << Opcodes[0] << " /* " << name1 << " */ : "
+        << Opcodes[1] << " /* " << name2 << " */ ;\n";
+
+      // Inserting the closing curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "}\n";
+      ++Indentation; ++Indentation;
+
+      return true;
+    }
+    if (name1 == "LDR" && name2 == "LDRcp") {
+      o.indent(Indentation)
+        << "return " << Opcodes[0]
+        << "; // Returning LDR for {LDR, LDRcp}\n";
+      return true;
+    }
+    if (name1 == "tLDM" && name2 == "tLDM_UPD") {
+      // Inserting the opening curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "{\n";
+      ++Indentation; ++Indentation;
+      
+      o.indent(Indentation)
+        << "unsigned Rn = fieldFromInstruction(insn, 8, 3), "
+        << "list = fieldFromInstruction(insn, 0, 8);\n";
+      o.indent(Indentation)
+        << "return ((list >> Rn) & 1) == 0 ? "
+        << Opcodes[1] << " /* " << name2 << " */ : "
+        << Opcodes[0] << " /* " << name1 << " */ ;\n";
+
+      // Inserting the closing curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "}\n";
+      ++Indentation; ++Indentation;
+
+      return true;
+    }
+    if (sameStringExceptSuffix(name1, name2, "_UPD")) {
+      o.indent(Indentation)
+        << "return fieldFromInstruction(insn, 0, 4) == 15 ? " << Opcodes[0]
+        << " /* " << name1 << " */ : " << Opcodes[1] << "/* " << name2
+        << " */ ; // Advanced SIMD addressing mode\n";
+      return true;
+    }
+
+    // Otherwise, it does not belong to the known conflict sets.
+  }
+  // We don't know how to decode these instructions!  Dump the conflict set!
+  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
+  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
+    o << nameWithID(Opcodes[i]);
+    if (i < (N - 1))
+      o << ", ";
+    else
+      o << '\n';
+  }
+  return true;
+}
+
+
+////////////////////////////////////////////
+//                                        //
+//  ARMDEBackend                          //
+//  (Helper class for ARMDecoderEmitter)  //
+//                                        //
+////////////////////////////////////////////
+
+class ARMDecoderEmitter::ARMDEBackend {
+public:
+  ARMDEBackend(ARMDecoderEmitter &frontend) :
+    NumberedInstructions(),
+    Opcodes(),
+    Frontend(frontend),
+    Target(),
+    FC(NULL)
+  {
+    if (Target.getName() == "ARM")
+      TargetName = TARGET_ARM;
+    else {
+      errs() << "Target name " << Target.getName() << " not recognized\n";
+      assert(0 && "Unknown target");
+    }
+
+    // Populate the instructions for our TargetName.
+    populateInstructions();
+  }
+
+  ~ARMDEBackend() {
+    if (FC) {
+      delete FC;
+      FC = NULL;
+    }
+  }
+
+  void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
+                                                &NumberedInstructions) {
+    // We must emit the PHI opcode first...
+    std::string Namespace = Target.getInstNamespace();
+    assert(!Namespace.empty() && "No instructions defined.");
+
+    NumberedInstructions = Target.getInstructionsByEnumValue();
+  }
+
+  bool populateInstruction(const CodeGenInstruction &CGI, TARGET_NAME_t TN);
+
+  void populateInstructions();
+
+  // Emits disassembler code for instruction decoding.  This delegates to the
+  // FilterChooser instance to do the heavy lifting.
+  void emit(raw_ostream &o);
+
+protected:
+  std::vector<const CodeGenInstruction*> NumberedInstructions;
+  std::vector<unsigned> Opcodes;
+  // Special case for the ARM chip, which supports ARM and Thumb ISAs.
+  // Opcodes2 will be populated with the Thumb opcodes.
+  std::vector<unsigned> Opcodes2;
+  ARMDecoderEmitter &Frontend;
+  CodeGenTarget Target;
+  FilterChooser *FC;
+
+  TARGET_NAME_t TargetName;
+};
+
+bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
+    const CodeGenInstruction &CGI, TARGET_NAME_t TN) {
+  const Record &Def = *CGI.TheDef;
+  const StringRef Name = Def.getName();
+  uint8_t Form = getByteField(Def, "Form");
+
+  if (TN == TARGET_ARM) {
+    // FIXME: what about Int_MemBarrierV6 and Int_SyncBarrierV6?
+    if ((Name != "Int_MemBarrierV7" && Name != "Int_SyncBarrierV7") &&
+        Form == ARM_FORMAT_PSEUDO)
+      return false;
+    if (thumbInstruction(Form))
+      return false;
+    if (Name.find("CMPz") != std::string::npos /* ||
+        Name.find("CMNz") != std::string::npos */)
+      return false;
+
+    // Ignore pseudo instructions.
+    if (Name == "BXr9" || Name == "BMOVPCRX" || Name == "BMOVPCRXr9")
+      return false;
+
+    // VLDMQ/VSTMQ can be hanlded with the more generic VLDMD/VSTMD.
+    if (Name == "VLDMQ" || Name == "VLDMQ_UPD" ||
+        Name == "VSTMQ" || Name == "VSTMQ_UPD")
+      return false;
+
+    //
+    // The following special cases are for conflict resolutions.
+    //
+
+    // NEON NLdStFrm conflict resolutions:
+    //
+    // 1. Ignore suffix "odd" and "odd_UPD", prefer the "even" register-
+    //    numbered ones which have the same Asm format string.
+    // 2. Ignore VST2d64_UPD, which conflicts with VST1q64_UPD.
+    // 3. Ignore VLD2d64_UPD, which conflicts with VLD1q64_UPD.
+    // 4. Ignore VLD1q[_UPD], which conflicts with VLD1q64[_UPD].
+    // 5. Ignore VST1q[_UPD], which conflicts with VST1q64[_UPD].
+    if (Name.endswith("odd") || Name.endswith("odd_UPD") ||
+        Name == "VST2d64_UPD" || Name == "VLD2d64_UPD" ||
+        Name == "VLD1q" || Name == "VLD1q_UPD" ||
+        Name == "VST1q" || Name == "VST1q_UPD")
+      return false;
+
+    // RSCSri and RSCSrs set the 's' bit, but are not predicated.  We are
+    // better off using the generic RSCri and RSCrs instructions.
+    if (Name == "RSCSri" || Name == "RSCSrs") return false;
+
+    // MOVCCr, MOVCCs, MOVCCi, FCYPScc, FCYPDcc, FNEGScc, and FNEGDcc are used
+    // in the compiler to implement conditional moves.  We can ignore them in
+    // favor of their more generic versions of instructions.
+    // See also SDNode *ARMDAGToDAGISel::Select(SDValue Op).
+    if (Name == "MOVCCr" || Name == "MOVCCs" || Name == "MOVCCi" ||
+        Name == "FCPYScc" || Name == "FCPYDcc" ||
+        Name == "FNEGScc" || Name == "FNEGDcc")
+      return false;
+
+    // Ditto for VMOVDcc, VMOVScc, VNEGDcc, and VNEGScc.
+    if (Name == "VMOVDcc" || Name == "VMOVScc" || Name == "VNEGDcc" ||
+        Name == "VNEGScc")
+      return false;
+
+    // Ignore the *_sfp instructions when decoding.  They are used by the
+    // compiler to implement scalar floating point operations using vector
+    // operations in order to work around some performance issues.
+    if (Name.find("_sfp") != std::string::npos) return false;
+
+    // LDM_RET is a special case of LDM (Load Multiple) where the registers
+    // loaded include the PC, causing a branch to a loaded address.  Ignore
+    // the LDM_RET instruction when decoding.
+    if (Name == "LDM_RET") return false;
+
+    // Bcc is in a more generic form than B.  Ignore B when decoding.
+    if (Name == "B") return false;
+
+    // Ignore the non-Darwin BL instructions and the TPsoft (TLS) instruction.
+    if (Name == "BL" || Name == "BL_pred" || Name == "BLX" || Name == "BX" ||
+        Name == "TPsoft")
+      return false;
+
+    // Ignore VDUPf[d|q] instructions known to conflict with VDUP32[d-q] for
+    // decoding.  The instruction duplicates an element from an ARM core
+    // register into every element of the destination vector.  There is no
+    // distinction between data types.
+    if (Name == "VDUPfd" || Name == "VDUPfq") return false;
+
+    // A8-598: VEXT
+    // Vector Extract extracts elements from the bottom end of the second
+    // operand vector and the top end of the first, concatenates them and
+    // places the result in the destination vector.  The elements of the
+    // vectors are treated as being 8-bit bitfields.  There is no distinction
+    // between data types.  The size of the operation can be specified in
+    // assembler as vext.size.  If the value is 16, 32, or 64, the syntax is
+    // a pseudo-instruction for a VEXT instruction specifying the equivalent
+    // number of bytes.
+    //
+    // Variants VEXTd16, VEXTd32, VEXTd8, and VEXTdf are reduced to VEXTd8;
+    // variants VEXTq16, VEXTq32, VEXTq8, and VEXTqf are reduced to VEXTq8.
+    if (Name == "VEXTd16" || Name == "VEXTd32" || Name == "VEXTdf" ||
+        Name == "VEXTq16" || Name == "VEXTq32" || Name == "VEXTqf")
+      return false;
+
+    // Vector Reverse is similar to Vector Extract.  There is no distinction
+    // between data types, other than size.
+    //
+    // VREV64df is equivalent to VREV64d32.
+    // VREV64qf is equivalent to VREV64q32.
+    if (Name == "VREV64df" || Name == "VREV64qf") return false;
+
+    // VDUPLNfd is equivalent to VDUPLN32d; VDUPfdf is specialized VDUPLN32d.
+    // VDUPLNfq is equivalent to VDUPLN32q; VDUPfqf is specialized VDUPLN32q.
+    // VLD1df is equivalent to VLD1d32.
+    // VLD1qf is equivalent to VLD1q32.
+    // VLD2d64 is equivalent to VLD1q64.
+    // VST1df is equivalent to VST1d32.
+    // VST1qf is equivalent to VST1q32.
+    // VST2d64 is equivalent to VST1q64.
+    if (Name == "VDUPLNfd" || Name == "VDUPfdf" ||
+        Name == "VDUPLNfq" || Name == "VDUPfqf" ||
+        Name == "VLD1df" || Name == "VLD1qf" || Name == "VLD2d64" ||
+        Name == "VST1df" || Name == "VST1qf" || Name == "VST2d64")
+      return false;
+  } else if (TN == TARGET_THUMB) {
+    if (!thumbInstruction(Form))
+      return false;
+
+    // Ignore pseudo instructions.
+    if (Name == "tInt_eh_sjlj_setjmp" || Name == "t2Int_eh_sjlj_setjmp" ||
+        Name == "t2MOVi32imm" || Name == "tBX" || Name == "tBXr9")
+      return false;
+
+    // On Darwin R9 is call-clobbered.  Ignore the non-Darwin counterparts.
+    if (Name == "tBL" || Name == "tBLXi" || Name == "tBLXr")
+      return false;
+
+    // Ignore the TPsoft (TLS) instructions, which conflict with tBLr9.
+    if (Name == "tTPsoft" || Name == "t2TPsoft")
+      return false;
+
+    // Ignore tLEApcrel and tLEApcrelJT, prefer tADDrPCi.
+    if (Name == "tLEApcrel" || Name == "tLEApcrelJT")
+      return false;
+
+    // Ignore t2LEApcrel, prefer the generic t2ADD* for disassembly printing.
+    if (Name == "t2LEApcrel")
+      return false;
+
+    // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
+    // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
+    // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
+    if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" ||
+        Name == "t2SUBrSPs" || Name == "t2ADDrSPs")
+      return false;
+
+    // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST.
+    if (Name == "t2LDRDpci")
+      return false;
+
+    // Ignore t2TBB, t2TBH and prefer the generic t2TBBgen, t2TBHgen.
+    if (Name == "t2TBB" || Name == "t2TBH")
+      return false;
+
+    // Resolve conflicts:
+    //
+    //   tBfar conflicts with tBLr9
+    //   tCMNz conflicts with tCMN (with assembly format strings being equal)
+    //   tPOP_RET/t2LDM_RET conflict with tPOP/t2LDM (ditto)
+    //   tMOVCCi conflicts with tMOVi8
+    //   tMOVCCr conflicts with tMOVgpr2gpr
+    //   tBR_JTr conflicts with tBRIND
+    //   tSpill conflicts with tSTRspi
+    //   tLDRcp conflicts with tLDRspi
+    //   tRestore conflicts with tLDRspi
+    //   t2LEApcrelJT conflicts with t2LEApcrel
+    //   t2ADDrSPi/t2SUBrSPi have more generic couterparts
+    if (Name == "tBfar" ||
+        /* Name == "tCMNz" || */ Name == "tCMPzi8" || Name == "tCMPzr" ||
+        Name == "tCMPzhir" || /* Name == "t2CMNzrr" || Name == "t2CMNzrs" ||
+        Name == "t2CMNzri" || */ Name == "t2CMPzrr" || Name == "t2CMPzrs" ||
+        Name == "t2CMPzri" || Name == "tPOP_RET" || Name == "t2LDM_RET" ||
+        Name == "tMOVCCi" || Name == "tMOVCCr" || Name == "tBR_JTr" ||
+        Name == "tSpill" || Name == "tLDRcp" || Name == "tRestore" ||
+        Name == "t2LEApcrelJT" || Name == "t2ADDrSPi" || Name == "t2SUBrSPi")
+      return false;
+  }
+
+  // Dumps the instruction encoding format.
+  switch (TargetName) {
+  case TARGET_ARM:
+  case TARGET_THUMB:
+    DEBUG(errs() << Name << " " << stringForARMFormat((ARMFormat)Form));
+    break;
+  }
+
+  DEBUG({
+      BitsInit &Bits = getBitsField(Def, "Inst");
+
+      errs() << " ";
+
+      // Dumps the instruction encoding bits.
+      dumpBits(errs(), Bits);
+
+      errs() << '\n';
+
+      // Dumps the list of operand info.
+      for (unsigned i = 0, e = CGI.OperandList.size(); i != e; ++i) {
+        CodeGenInstruction::OperandInfo Info = CGI.OperandList[i];
+        const std::string &OperandName = Info.Name;
+        const Record &OperandDef = *Info.Rec;
+
+        errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
+      }
+    });
+
+  return true;
+}
+
+void ARMDecoderEmitter::ARMDEBackend::populateInstructions() {
+  getInstructionsByEnumValue(NumberedInstructions);
+
+  uint16_t numUIDs = NumberedInstructions.size();
+  uint16_t uid;
+
+  const char *instClass = NULL;
+
+  switch (TargetName) {
+  case TARGET_ARM:
+    instClass = "InstARM";
+    break;
+  default:
+    assert(0 && "Unreachable code!");
+  }
+
+  for (uid = 0; uid < numUIDs; uid++) {
+    // filter out intrinsics
+    if (!NumberedInstructions[uid]->TheDef->isSubClassOf(instClass))
+      continue;
+
+    if (populateInstruction(*NumberedInstructions[uid], TargetName))
+      Opcodes.push_back(uid);
+  }
+
+  // Special handling for the ARM chip, which supports two modes of execution.
+  // This branch handles the Thumb opcodes.
+  if (TargetName == TARGET_ARM) {
+    for (uid = 0; uid < numUIDs; uid++) {
+      // filter out intrinsics
+      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM")
+          && !NumberedInstructions[uid]->TheDef->isSubClassOf("InstThumb"))
+        continue;
+
+      if (populateInstruction(*NumberedInstructions[uid], TARGET_THUMB))
+        Opcodes2.push_back(uid);
+    }
+  }
+}
+
+// Emits disassembler code for instruction decoding.  This delegates to the
+// FilterChooser instance to do the heavy lifting.
+void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) {
+  switch (TargetName) {
+  case TARGET_ARM:
+    Frontend.EmitSourceFileHeader("ARM/Thumb Decoders", o);
+    break;
+  default:
+    assert(0 && "Unreachable code!");
+  }
+
+  o << "#include \"llvm/System/DataTypes.h\"\n";
+  o << "#include <assert.h>\n";
+  o << '\n';
+  o << "namespace llvm {\n\n";
+
+  FilterChooser::setTargetName(TargetName);
+
+  switch (TargetName) {
+  case TARGET_ARM: {
+    // Emit common utility and ARM ISA decoder.
+    FC = new FilterChooser(NumberedInstructions, Opcodes);
+    // Reset indentation level.
+    unsigned Indentation = 0;
+    FC->emitTop(o, Indentation);
+    delete FC;
+
+    // Emit Thumb ISA decoder as well.
+    FilterChooser::setTargetName(TARGET_THUMB);
+    FC = new FilterChooser(NumberedInstructions, Opcodes2);
+    // Reset indentation level.
+    Indentation = 0;
+    FC->emitBot(o, Indentation);
+    break;
+  }
+  default:
+    assert(0 && "Unreachable code!");
+  }
+
+  o << "\n} // End llvm namespace \n";
+}
+
+/////////////////////////
+//  Backend interface  //
+/////////////////////////
+
+void ARMDecoderEmitter::initBackend()
+{
+    Backend = new ARMDEBackend(*this);
+}
+
+void ARMDecoderEmitter::run(raw_ostream &o)
+{
+  Backend->emit(o);
+}
+
+void ARMDecoderEmitter::shutdownBackend()
+{
+  delete Backend;
+  Backend = NULL;
+}
diff --git a/utils/TableGen/ARMDecoderEmitter.h b/utils/TableGen/ARMDecoderEmitter.h
new file mode 100644
index 000000000000..107e0851ba1b
--- /dev/null
+++ b/utils/TableGen/ARMDecoderEmitter.h
@@ -0,0 +1,50 @@
+//===------------ ARMDecoderEmitter.h - Decoder Generator -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the tablegen backend declaration ARMDecoderEmitter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDECODEREMITTER_H
+#define ARMDECODEREMITTER_H
+
+#include "TableGenBackend.h"
+
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+
+class ARMDecoderEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  ARMDecoderEmitter(RecordKeeper &R) : Records(R) {
+    initBackend();
+  }
+    
+  ~ARMDecoderEmitter() {
+    shutdownBackend();
+  }
+	
+  // run - Output the code emitter
+  void run(raw_ostream &o);
+    
+private:
+  // Helper class for ARMDecoderEmitter.
+  class ARMDEBackend;
+
+  ARMDEBackend *Backend;
+    
+  void initBackend();
+  void shutdownBackend();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 881b50a01faf..f68159ab9081 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_executable(tblgen
+  ARMDecoderEmitter.cpp
   AsmMatcherEmitter.cpp
   AsmWriterEmitter.cpp
   AsmWriterInst.cpp
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index a195c0b8d6dc..3284366c6dd8 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -12,6 +12,8 @@
 #include "Record.h"
 #include "X86DisassemblerTables.h"
 #include "X86RecognizableInstr.h"
+#include "ARMDecoderEmitter.h"
+
 using namespace llvm;
 using namespace llvm::X86Disassembler;
 
@@ -124,6 +126,12 @@ void DisassemblerEmitter::run(raw_ostream &OS) {
     return;
   }
 
+  // Fixed-instruction-length targets use a common disassembler.
+  if (Target.getName() == "ARM") {
+    ARMDecoderEmitter(Records).run(OS);
+    return;
+  }  
+
   throw TGError(Target.getTargetRecord()->getLoc(),
                 "Unable to generate disassembler for this target");
 }
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 1326ebc023f1..1c66399ce8b9 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -31,6 +31,7 @@
 #include "OptParserEmitter.h"
 #include "Record.h"
 #include "RegisterInfoEmitter.h"
+#include "ARMDecoderEmitter.h"
 #include "SubtargetEmitter.h"
 #include "TGParser.h"
 #include "llvm/Support/CommandLine.h"
@@ -47,6 +48,7 @@ enum ActionType {
   GenEmitter,
   GenRegisterEnums, GenRegister, GenRegisterHeader,
   GenInstrEnums, GenInstrs, GenAsmWriter, GenAsmMatcher,
+  GenARMDecoder,
   GenDisassembler,
   GenCallingConv,
   GenClangDiagsDefs,
@@ -83,6 +85,8 @@ namespace {
                                "Generate calling convention descriptions"),
                     clEnumValN(GenAsmWriter, "gen-asm-writer",
                                "Generate assembly writer"),
+                    clEnumValN(GenARMDecoder, "gen-arm-decoder",
+                               "Generate decoders for ARM/Thumb"),
                     clEnumValN(GenDisassembler, "gen-disassembler",
                                "Generate disassembler"),
                     clEnumValN(GenAsmMatcher, "gen-asm-matcher",
@@ -228,6 +232,9 @@ int main(int argc, char **argv) {
     case GenAsmWriter:
       AsmWriterEmitter(Records).run(*Out);
       break;
+    case GenARMDecoder:
+      ARMDecoderEmitter(Records).run(*Out);
+      break;
     case GenAsmMatcher:
       AsmMatcherEmitter(Records).run(*Out);
       break;