96 files changed, 3324 insertions, 1344 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b5240c7952e..fbf8e2bb90e5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -128,10 +128,15 @@ if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
   set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
 endif()
 
+set(LLVM_TARGETS_TO_BUILD
+   ${LLVM_TARGETS_TO_BUILD}
+   ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD})
+
 set(LLVM_ENUM_TARGETS "")
 foreach(c ${LLVM_TARGETS_TO_BUILD})
   list(FIND LLVM_ALL_TARGETS ${c} idx)
-  if( idx LESS 0 )
+  list(FIND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ${c} idy)
+  if( idx LESS 0 AND idy LESS 0 )
     message(FATAL_ERROR "The target `${c}' does not exist.
     It should be one of\n${LLVM_ALL_TARGETS}")
   else()
@@ -139,11 +144,6 @@ foreach(c ${LLVM_TARGETS_TO_BUILD})
   endif()
 endforeach(c)
 
-set(LLVM_TARGETS_TO_BUILD
-  ${LLVM_TARGETS_TO_BUILD}
-  ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}
-  )
-
 set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
 
 include(AddLLVMDefinitions)
diff --git a/Makefile b/Makefile
index ec24862ad6fc..604696a1df43 100644
--- a/Makefile
+++ b/Makefile
@@ -244,13 +244,13 @@ build-for-llvm-top:
 SVN = svn
 SVN-UPDATE-OPTIONS =
 AWK = awk
-SUB-SVN-DIRS = $(AWK) '/\?\ \ \ \ \ \ / {print $$2}'   \
+SUB-SVN-DIRS = $(AWK) '/I|\?      / {print $$2}'   \
 		| LC_ALL=C xargs $(SVN) info 2>/dev/null \
 		| $(AWK) '/^Path:\ / {print $$2}'
 
 update:
 	$(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT)
-	@ $(SVN) status $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
+	@ $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
 
 happiness: update all check-all
 
diff --git a/Makefile.config.in b/Makefile.config.in
index 6d06f16b3575..e3bd2a207a50 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -258,6 +258,11 @@ ENABLE_WERROR = @ENABLE_WERROR@
 #DEBUG_SYMBOLS = 1
 @DEBUG_SYMBOLS@
 
+# When KEEP_SYMBOLS is enabled, installed executables will never have their
+# symbols stripped.
+#KEEP_SYMBOLS = 1
+@KEEP_SYMBOLS@
+
 # The compiler flags to use for optimized builds.
 OPTIMIZE_OPTION := @OPTIMIZE_OPTION@
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 4dde3a6dfa39..7fa883e9cc50 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -542,6 +542,15 @@ else
   AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]])
 fi
 
+dnl --enable-keep-symbols : do not strip installed executables
+AC_ARG_ENABLE(keep-symbols,
+   AS_HELP_STRING(--enable-keep-symbols,[Do not strip installed executables)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(KEEP_SYMBOLS,[[]])
+else
+  AC_SUBST(KEEP_SYMBOLS,[[KEEP_SYMBOLS=1]])
+fi
+
 dnl --enable-jit: check whether they want to enable the jit
 AC_ARG_ENABLE(jit,
   AS_HELP_STRING(--enable-jit,
diff --git a/configure b/configure
index 62a6478e0af0..6fbc47c72a7e 100755
--- a/configure
+++ b/configure
@@ -693,6 +693,7 @@ ENABLE_EXPENSIVE_CHECKS
 EXPENSIVE_CHECKS
 DEBUG_RUNTIME
 DEBUG_SYMBOLS
+KEEP_SYMBOLS
 JIT
 TARGET_HAS_JIT
 ENABLE_DOCS
@@ -1408,6 +1409,7 @@ Optional Features:
                           NO)
   --enable-debug-symbols  Build compiler with debug symbols (default is NO if
                           optimization is on and YES if it's off)
+  --enable-keep-symbols   Do not strip installed executables)
   --enable-jit            Enable Just In Time Compiling (default is YES)
   --enable-docs           Build documents (default is YES)
   --enable-doxygen        Build doxygen documentation (default is NO)
@@ -5158,6 +5160,21 @@ else
 
 fi
 
+# Check whether --enable-keep-symbols was given.
+if test "${enable_keep_symbols+set}" = set; then
+  enableval=$enable_keep_symbols;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  KEEP_SYMBOLS=
+
+else
+  KEEP_SYMBOLS=KEEP_SYMBOLS=1
+
+fi
+
 # Check whether --enable-jit was given.
 if test "${enable_jit+set}" = set; then
   enableval=$enable_jit;
@@ -10272,7 +10289,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10275 "configure"
+#line 10292 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -22150,12 +22167,12 @@ ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
 DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
 DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
+KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
-ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -22197,6 +22214,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
@@ -22293,7 +22311,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 94; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 65a70fbfa6d0..f60d688c0dce 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -687,8 +687,7 @@ class SmallDenseMap
 
   /// A "union" of an inline bucket array and the struct representing
   /// a large bucket. This union will be discriminated by the 'Small' bit.
-  typename AlignedCharArray<BucketT[InlineBuckets], LargeRep>::union_type
-    storage;
+  AlignedCharArrayUnion<BucketT[InlineBuckets], LargeRep> storage;
 
 public:
   explicit SmallDenseMap(unsigned NumInitBuckets = 0) {
@@ -834,8 +833,7 @@ public:
         return; // Nothing to do.
 
       // First move the inline buckets into a temporary storage.
-      typename AlignedCharArray<BucketT[InlineBuckets]>::union_type
-        TmpStorage;
+      AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage;
       BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer);
       BucketT *TmpEnd = TmpBegin;
 
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
index a9a0dc6b6e20..a7f83a6bca9d 100644
--- a/include/llvm/ADT/VariadicFunction.h
+++ b/include/llvm/ADT/VariadicFunction.h
@@ -206,7 +206,7 @@ struct VariadicFunction2 {
   ResultT operator()(Param0T P0, Param1T P1, \
                      LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
     const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
-    return Func(P0, P1, makeAraryRef(Args)); \
+    return Func(P0, P1, makeArrayRef(Args)); \
   }
   LLVM_DEFINE_OVERLOAD(1)
   LLVM_DEFINE_OVERLOAD(2)
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 2ced7967ed5b..006daa082946 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -122,6 +122,7 @@ private:
   bool calcLoopBranchHeuristics(BasicBlock *BB);
   bool calcZeroHeuristics(BasicBlock *BB);
   bool calcFloatingPointHeuristics(BasicBlock *BB);
+  bool calcInvokeHeuristics(BasicBlock *BB);
 };
 
 }
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 1289eddf51b9..a1cc196eae30 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -705,7 +705,20 @@ DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A, const NodeT *B) {
 
 EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase<BasicBlock>);
 
-class BasicBlockEdge;
+class BasicBlockEdge {
+  const BasicBlock *Start;
+  const BasicBlock *End;
+public:
+  BasicBlockEdge(const BasicBlock *Start_, const BasicBlock *End_) :
+    Start(Start_), End(End_) { }
+  const BasicBlock *getStart() const {
+    return Start;
+  }
+  const BasicBlock *getEnd() const {
+    return End;
+  }
+  bool isSingleEdge() const;
+};
 
 //===-------------------------------------
 /// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 4c5eb8b36f36..27756abf3f54 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -420,6 +420,12 @@ public:
     return hasProperty(MCID::Bitcast, Type);
   }
 
+  /// isSelect - Return true if this instruction is a select instruction.
+  ///
+  bool isSelect(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Select, Type);
+  }
+
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 0dfb394d04a7..db361ee9b1bc 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -146,7 +146,8 @@ public:
   inline bool isMachineOpcode() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc getDebugLoc() const;
-
+  inline void dump() const;
+  inline void dumpr() const;
 
   /// reachesChainWithoutSideEffects - Return true if this operand (which must
   /// be a chain) reaches the specified operand without crossing any
@@ -806,7 +807,12 @@ inline bool SDValue::hasOneUse() const {
 inline const DebugLoc SDValue::getDebugLoc() const {
   return Node->getDebugLoc();
 }
-
+inline void SDValue::dump() const {
+  return Node->dump();
+}
+inline void SDValue::dumpr() const {
+  return Node->dumpr();
+}
 // Define inline functions from the SDUse class.
 
 inline void SDUse::set(const SDValue &V) {
diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IntrinsicsHexagon.td
index efd04f309ae3..8a8872931f36 100644
--- a/include/llvm/IntrinsicsHexagon.td
+++ b/include/llvm/IntrinsicsHexagon.td
@@ -15,7 +15,7 @@
 //
 // All Hexagon intrinsics start with "llvm.hexagon.".
 let TargetPrefix = "hexagon" in {
-  /// Hexagon_Intrinsic - Base class for all altivec intrinsics.
+  /// Hexagon_Intrinsic - Base class for all Hexagon intrinsics.
   class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
                               list<LLVMType> param_types,
                               list<IntrinsicProperty> properties>
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
new file mode 100644
index 000000000000..22b3c32abde9
--- /dev/null
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -0,0 +1,32 @@
+//===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Fixed length disassembler decoder state machine driver.
+//===----------------------------------------------------------------------===//
+#ifndef MCFIXEDLENDISASSEMBLER_H
+#define MCFIXEDLENDISASSEMBLER_H
+
+namespace llvm {
+
+namespace MCD {
+// Disassembler state machine opcodes.
+enum DecoderOps {
+  OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len)
+  OPC_FilterValue,      // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip)
+  OPC_CheckField,       // OPC_CheckField(uint8_t Start, uint8_t Len,
+                        //                uleb128 Val, uint16_t NumToSkip)
+  OPC_CheckPredicate,   // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
+  OPC_Decode,           // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
+  OPC_SoftFail,         // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
+  OPC_Fail              // OPC_Fail()
+};
+
+} // namespace MCDecode
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 186612d904d8..dbf16d870050 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -107,6 +107,7 @@ namespace MCID {
     Compare,
     MoveImm,
     Bitcast,
+    Select,
     DelaySlot,
     FoldableAsLoad,
     MayLoad,
@@ -282,6 +283,12 @@ public:
     return Flags & (1 << MCID::Bitcast);
   }
 
+  /// isSelect - Return true if this is a select instruction.
+  ///
+  bool isSelect() const {
+    return Flags & (1 << MCID::Select);
+  }
+
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index 85607c84482c..cf7125173ee1 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -107,8 +107,8 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
 // Any larger and MSVC complains.
 #undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT
 
-/// \brief This class template exposes a typedef for type containing a suitable
-/// aligned character array to hold elements of any of up to four types.
+/// \brief This union template exposes a suitably aligned and sized character
+/// array member which can hold elements of any of up to four types.
 ///
 /// These types may be arrays, structs, or any other types. The goal is to
 /// produce a union type containing a character array which, when used, forms
@@ -116,7 +116,8 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
 /// than four types can be added at the cost of more boiler plate.
 template <typename T1,
           typename T2 = char, typename T3 = char, typename T4 = char>
-class AlignedCharArray {
+union AlignedCharArrayUnion {
+private:
   class AlignerImpl {
     T1 t1; T2 t2; T3 t3; T4 t4;
 
@@ -127,6 +128,12 @@ class AlignedCharArray {
   };
 
 public:
+  /// \brief The character array buffer for use by clients.
+  ///
+  /// No other member of this union should be referenced. The exist purely to
+  /// constrain the layout of this character array.
+  char buffer[sizeof(SizerImpl)];
+
   // Sadly, Clang and GCC both fail to align a character array properly even
   // with an explicit alignment attribute. To work around this, we union
   // the character array that will actually be used with a struct that contains
@@ -134,16 +141,10 @@ public:
   // and GCC will properly register the alignment of a struct containing an
   // aligned member, and this alignment should carry over to the character
   // array in the union.
-  union union_type {
-    // This is the only member of the union which should be used by clients:
-    char buffer[sizeof(SizerImpl)];
-
-    // This member of the union only exists to force the alignment.
-    struct {
-      typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type
-        nonce_inner_member;
-    } nonce_member;
-  };
+  struct {
+    typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type
+      nonce_inner_member;
+  } nonce_member;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 6c2ee08756e7..ba8adb018173 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -50,7 +50,7 @@ namespace COFF {
   };
 
   enum MachineTypes {
-    MT_Invalid = -1,
+    MT_Invalid = 0xffff,
 
     IMAGE_FILE_MACHINE_UNKNOWN   = 0x0,
     IMAGE_FILE_MACHINE_AM33      = 0x13,
@@ -142,7 +142,7 @@ namespace COFF {
 
   /// Storage class tells where and what the symbol represents
   enum SymbolStorageClass {
-    SSC_Invalid = -1,
+    SSC_Invalid = 0xff,
 
     IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
     IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
@@ -220,7 +220,7 @@ namespace COFF {
   };
 
   enum SectionCharacteristics {
-    SC_Invalid = -1,
+    SC_Invalid = 0xffffffff,
 
     IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
     IMAGE_SCN_CNT_CODE               = 0x00000020,
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index f654f327a923..4469ae31de09 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -38,6 +38,25 @@
 #define llvm_move(value) (value)
 #endif
 
+/// LLVM_DELETED_FUNCTION - Expands to = delete if the compiler supports it.
+/// Use to mark functions as uncallable. Member functions with this should
+/// be declared private so that some behaivor is kept in C++03 mode.
+///
+/// class DontCopy {
+/// private:
+///   DontCopy(const DontCopy&) LLVM_DELETED_FUNCTION;
+///   DontCopy &operator =(const DontCopy&) LLVM_DELETED_FUNCTION;
+/// public:
+///   ...
+/// };
+#if (__has_feature(cxx_deleted_functions) \
+     || defined(__GXX_EXPERIMENTAL_CXX0X__))
+     // No version of MSVC currently supports this.
+#define LLVM_DELETED_FUNCTION = delete
+#else
+#define LLVM_DELETED_FUNCTION
+#endif
+
 /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
 /// into a shared library, then the class should be private to the library and
 /// not accessible from outside it.  Can also be used to mark variables and
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index e0353f957a7c..f4a9aa0e8998 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -28,6 +28,7 @@
 #define LLVM_SUPPORT_FILE_SYSTEM_H
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DataTypes.h"
@@ -576,6 +577,82 @@ error_code FindLibrary(const Twine &short_name, SmallVectorImpl<char> &result);
 error_code GetMainExecutable(const char *argv0, void *MainAddr,
                              SmallVectorImpl<char> &result);
 
+/// This class represents a memory mapped file. It is based on
+/// boost::iostreams::mapped_file.
+class mapped_file_region {
+  mapped_file_region() LLVM_DELETED_FUNCTION;
+  mapped_file_region(mapped_file_region&) LLVM_DELETED_FUNCTION;
+  mapped_file_region &operator =(mapped_file_region&) LLVM_DELETED_FUNCTION;
+
+public:
+  enum mapmode {
+    readonly, //< May only access map via const_data as read only.
+    readwrite, //< May access map via data and modify it. Written to path.
+    priv //< May modify via data, but changes are lost on destruction.
+  };
+
+private:
+  /// Platform specific mapping state.
+  mapmode Mode;
+  uint64_t Size;
+  void *Mapping;
+#if LLVM_ON_WIN32
+  int FileDescriptor;
+  void *FileHandle;
+  void *FileMappingHandle;
+#endif
+
+  error_code init(int FD, uint64_t Offset);
+
+public:
+  typedef char char_type;
+
+#if LLVM_USE_RVALUE_REFERENCES
+  mapped_file_region(mapped_file_region&&);
+  mapped_file_region &operator =(mapped_file_region&&);
+#endif
+
+  /// Construct a mapped_file_region at \a path starting at \a offset of length
+  /// \a length and with access \a mode.
+  ///
+  /// \param path Path to the file to map. If it does not exist it will be
+  ///             created.
+  /// \param mode How to map the memory.
+  /// \param length Number of bytes to map in starting at \a offset. If the file
+  ///               is shorter than this, it will be extended. If \a length is
+  ///               0, the entire file will be mapped.
+  /// \param offset Byte offset from the beginning of the file where the map
+  ///               should begin. Must be a multiple of
+  ///               mapped_file_region::alignment().
+  /// \param ec This is set to errc::success if the map was constructed
+  ///           sucessfully. Otherwise it is set to a platform dependent error.
+  mapped_file_region(const Twine &path,
+                     mapmode mode,
+                     uint64_t length,
+                     uint64_t offset,
+                     error_code &ec);
+
+  /// \param fd An open file descriptor to map. mapped_file_region takes
+  ///           ownership. It must have been opended in the correct mode.
+  mapped_file_region(int fd,
+                     mapmode mode,
+                     uint64_t length,
+                     uint64_t offset,
+                     error_code &ec);
+
+  ~mapped_file_region();
+
+  mapmode flags() const;
+  uint64_t size() const;
+  char *data() const;
+
+  /// Get a const view of the data. Modifying this memory has undefined
+  /// behaivor.
+  const char *const_data() const;
+
+  /// \returns The minimum alignment offset must be.
+  static int alignment();
+};
 
 /// @brief Memory maps the contents of a file
 ///
diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h
index 00c7eeaebcbb..410edd4dc740 100644
--- a/include/llvm/Support/LEB128.h
+++ b/include/llvm/Support/LEB128.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 
-/// Utility function to encode a SLEB128 value.
+/// Utility function to encode a SLEB128 value to an output stream.
 static inline void encodeSLEB128(int64_t Value, raw_ostream &OS) {
   bool More;
   do {
@@ -34,7 +34,7 @@ static inline void encodeSLEB128(int64_t Value, raw_ostream &OS) {
   } while (More);
 }
 
-/// Utility function to encode a ULEB128 value.
+/// Utility function to encode a ULEB128 value to an output stream.
 static inline void encodeULEB128(uint64_t Value, raw_ostream &OS,
                                  unsigned Padding = 0) {
   do {
@@ -53,6 +53,43 @@ static inline void encodeULEB128(uint64_t Value, raw_ostream &OS,
   }
 }
 
+/// Utility function to encode a ULEB128 value to a buffer. Returns
+/// the length in bytes of the encoded value.
+static inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
+                                     unsigned Padding = 0) {
+  uint8_t *orig_p = p;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0 || Padding != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    *p++ = Byte;
+  } while (Value != 0);
+
+  // Pad with 0x80 and emit a null byte at the end.
+  if (Padding != 0) {
+    for (; Padding != 1; --Padding)
+      *p++ = '\x80';
+    *p++ = '\x00';
+  }
+  return (unsigned)(p - orig_p);
+}
+
+
+/// Utility function to decode a ULEB128 value.
+static inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = 0) {
+  const uint8_t *orig_p = p;
+  uint64_t Value = 0;
+  unsigned Shift = 0;
+  do {
+    Value += (*p & 0x7f) << Shift;
+    Shift += 7;
+  } while (*p++ >= 128);
+  if (n)
+    *n = (unsigned)(p - orig_p);
+  return Value;
+}
+
 }  // namespace llvm
 
 #endif  // LLVM_SYSTEM_LEB128_H
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 75c1a79265e2..8e41a64b1770 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -181,6 +181,12 @@ public:
                                 ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getGetElementPtr(C, IdxList);
   }
+  Constant *CreateGetElementPtr(Constant *C, Constant *Idx) const {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return ConstantExpr::getGetElementPtr(C, Idx);
+  }
   Instruction *CreateGetElementPtr(Constant *C,
                                    ArrayRef<Value *> IdxList) const {
     return GetElementPtrInst::Create(C, IdxList);
@@ -190,6 +196,12 @@ public:
                                         ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getInBoundsGetElementPtr(C, IdxList);
   }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant *Idx) const {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idx);
+  }
   Instruction *CreateInBoundsGetElementPtr(Constant *C,
                                            ArrayRef<Value *> IdxList) const {
     return GetElementPtrInst::CreateInBounds(C, IdxList);
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 24be2b10416c..1816445579ed 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -28,6 +28,24 @@ class SubRegIndex<list<SubRegIndex> comps = []> {
   // ComposedOf - A list of two SubRegIndex instances, [A, B].
   // This indicates that this SubRegIndex is the result of composing A and B.
   list<SubRegIndex> ComposedOf = comps;
+
+  // CoveringSubRegIndices - A list of two or more sub-register indexes that
+  // cover this sub-register.
+  //
+  // This field should normally be left blank as TableGen can infer it.
+  //
+  // TableGen automatically detects sub-registers that straddle the registers
+  // in the SubRegs field of a Register definition. For example:
+  //
+  //   Q0    = dsub_0 -> D0, dsub_1 -> D1
+  //   Q1    = dsub_0 -> D2, dsub_1 -> D3
+  //   D1_D2 = dsub_0 -> D1, dsub_1 -> D2
+  //   QQ0   = qsub_0 -> Q0, qsub_1 -> Q1
+  //
+  // TableGen will infer that D1_D2 is a sub-register of QQ0. It will be given
+  // the synthetic index dsub_1_dsub_2 unless some SubRegIndex is defined with
+  // CoveringSubRegIndices = [dsub_1, dsub_2].
+  list<SubRegIndex> CoveringSubRegIndices = [];
 }
 
 // RegAltNameIndex - The alternate name set to use for register operands of
@@ -321,6 +339,7 @@ class Instruction {
   bit isCompare    = 0;     // Is this instruction a comparison instruction?
   bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
   bit isBitcast    = 0;     // Is this instruction a bitcast instruction?
+  bit isSelect     = 0;     // Is this instruction a select instruction?
   bit isBarrier    = 0;     // Can control flow fall through this instruction?
   bit isCall       = 0;     // Is this instruction a call instruction?
   bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index a18b0305a94b..da30ab82d6c2 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -413,6 +413,51 @@ public:
     llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
   }
 
+  /// analyzeSelect - Analyze the given select instruction, returning true if
+  /// it cannot be understood. It is assumed that MI->isSelect() is true.
+  ///
+  /// When successful, return the controlling condition and the operands that
+  /// determine the true and false result values.
+  ///
+  ///   Result = SELECT Cond, TrueOp, FalseOp
+  ///
+  /// Some targets can optimize select instructions, for example by predicating
+  /// the instruction defining one of the operands. Such targets should set
+  /// Optimizable.
+  ///
+  /// @param         MI Select instruction to analyze.
+  /// @param Cond    Condition controlling the select.
+  /// @param TrueOp  Operand number of the value selected when Cond is true.
+  /// @param FalseOp Operand number of the value selected when Cond is false.
+  /// @param Optimizable Returned as true if MI is optimizable.
+  /// @returns False on success.
+  virtual bool analyzeSelect(const MachineInstr *MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const {
+    assert(MI && MI->isSelect() && "MI must be a select instruction");
+    return true;
+  }
+
+  /// optimizeSelect - Given a select instruction that was understood by
+  /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by
+  /// merging it with one of its operands. Returns NULL on failure.
+  ///
+  /// When successful, returns the new select instruction. The client is
+  /// responsible for deleting MI.
+  ///
+  /// If both sides of the select can be optimized, PreferFalse is used to pick
+  /// a side.
+  ///
+  /// @param MI          Optimizable select instruction.
+  /// @param PreferFalse Try to optimize FalseOp instead of TrueOp.
+  /// @returns Optimized instruction or NULL.
+  virtual MachineInstr *optimizeSelect(MachineInstr *MI,
+                                       bool PreferFalse = false) const {
+    // This function must be implemented if Optimizable is ever set.
+    llvm_unreachable("Target must implement TargetInstrInfo::optimizeSelect!");
+  }
+
   /// copyPhysReg - Emit instructions to copy a pair of physical registers.
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, DebugLoc DL,
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 2730ce6c63bf..b255ce6dba51 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,4 +1,4 @@
-//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -78,6 +78,19 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
 static const uint32_t FPH_TAKEN_WEIGHT = 20;
 static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
 
+/// \brief Invoke-terminating normal branch taken weight
+///
+/// This is the weight for branching to the normal destination of an invoke
+/// instruction. We expect this to happen most of the time. Set the weight to an
+/// absurdly high value so that nested loops subsume it.
+static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+
+/// \brief Invoke-terminating normal branch not-taken weight.
+///
+/// This is the weight for branching to the unwind destination of an invoke
+/// instruction. This is essentially never taken.
+static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+
 // Standard weight value. Used when none of the heuristics set weight for
 // the edge.
 static const uint32_t NORMAL_WEIGHT = 16;
@@ -371,6 +384,19 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
   return true;
 }
 
+bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
+  InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
+  if (!II)
+    return false;
+
+  BasicBlock *Normal = II->getNormalDest();
+  BasicBlock *Unwind = II->getUnwindDest();
+
+  setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT);
+  setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT);
+  return true;
+}
+
 void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LoopInfo>();
   AU.setPreservesAll();
@@ -397,7 +423,9 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
       continue;
     if (calcZeroHeuristics(*I))
       continue;
-    calcFloatingPointHeuristics(*I);
+    if (calcFloatingPointHeuristics(*I))
+      continue;
+    calcInvokeHeuristics(*I);
   }
 
   PostDominatedByUnreachable.clear();
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index c0cc27b6ec5e..e77d2ff9e44e 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -473,6 +473,10 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
+  // Ignore self-referencing GEPs, they can occur in unreachable code.
+  if (&GEP == GEP.getPointerOperand())
+    return unknown();
+
   SizeOffsetType PtrData = compute(GEP.getPointerOperand());
   if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices())
     return unknown();
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 172402e20d64..f745b41c16fe 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -681,10 +681,10 @@ void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const MCInstrDesc &MCID = MI->getDesc();
-  const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
 
   // The first MCID.NumDefs operands must be explicit register defines
   if (MONum < MCID.getNumDefs()) {
+    const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
     if (!MO->isReg())
       report("Explicit definition must be a register", MO, MONum);
     else if (!MO->isDef() && !MCOI.isOptionalDef())
@@ -692,6 +692,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
   } else if (MONum < MCID.getNumOperands()) {
+    const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
     if (MO->isReg() &&
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 6bc7e37e3d87..9099862bd312 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -79,6 +79,7 @@ STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
 STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate folded");
 STATISTIC(NumLoadFold,   "Number of loads folded");
+STATISTIC(NumSelects,    "Number of selects optimized");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -109,6 +110,7 @@ namespace {
     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool optimizeSelect(MachineInstr *MI);
     bool isMoveImmediate(MachineInstr *MI,
                          SmallSet<unsigned, 4> &ImmDefRegs,
                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
@@ -386,6 +388,23 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
   return false;
 }
 
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+  unsigned TrueOp = 0;
+  unsigned FalseOp = 0;
+  bool Optimizable = false;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+    return false;
+  if (!Optimizable)
+    return false;
+  if (!TII->optimizeSelect(MI))
+    return false;
+  MI->eraseFromParent();
+  ++NumSelects;
+  return true;
+}
+
 /// isLoadFoldable - Check whether MI is a candidate for folding into a later
 /// instruction. We only fold loads to virtual registers and the virtual
 /// register defined has a single use.
@@ -477,11 +496,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
     ImmDefMIs.clear();
     FoldAsLoadDefReg = 0;
 
-    bool First = true;
-    MachineBasicBlock::iterator PMII;
     for (MachineBasicBlock::iterator
            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
       MachineInstr *MI = &*MII;
+      // We may be erasing MI below, increment MII now.
+      ++MII;
       LocalMIs.insert(MI);
 
       // If there exists an instruction which belongs to the following
@@ -490,28 +509,18 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
           MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
           MI->hasUnmodeledSideEffects()) {
         FoldAsLoadDefReg = 0;
-        ++MII;
         continue;
       }
       if (MI->mayStore() || MI->isCall())
         FoldAsLoadDefReg = 0;
 
-      if (MI->isBitcast()) {
-        if (optimizeBitcastInstr(MI, MBB)) {
-          // MI is deleted.
-          LocalMIs.erase(MI);
-          Changed = true;
-          MII = First ? I->begin() : llvm::next(PMII);
-          continue;
-        }
-      } else if (MI->isCompare()) {
-        if (optimizeCmpInstr(MI, MBB)) {
-          // MI is deleted.
-          LocalMIs.erase(MI);
-          Changed = true;
-          MII = First ? I->begin() : llvm::next(PMII);
-          continue;
-        }
+      if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+          (MI->isSelect() && optimizeSelect(MI))) {
+        // MI is deleted.
+        LocalMIs.erase(MI);
+        Changed = true;
+        continue;
       }
 
       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
@@ -542,14 +551,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
 
           // MI is replaced with FoldMI.
           Changed = true;
-          PMII = FoldMI;
-          MII = llvm::next(PMII);
           continue;
         }
       }
-      First = false;
-      PMII = MII;
-      ++MII;
     }
   }
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f0c50c1ed3cd..6820175c1bed 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         N0.getOpcode() == ISD::AND)
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-        EVT ShiftTy = DCI.isBeforeLegalize() ?
+        EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
           getPointerTy() : getShiftAmountTy(N0.getValueType());
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
@@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            EVT ShiftTy = DCI.isBeforeLegalize() ?
+            EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
               getPointerTy() : getShiftAmountTy(N0.getValueType());
             EVT CmpTy = N0.getValueType();
             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
         NewC = NewC.lshr(ShiftBits);
         if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
-          EVT ShiftTy = DCI.isBeforeLegalize() ?
+          EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
             getPointerTy() : getShiftAmountTy(N0.getValueType());
           EVT CmpTy = N0.getValueType();
           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 7be6ef8cba93..61bc119d305b 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -461,6 +461,9 @@ namespace {
     /// allocateCodeSection - Allocate memory for a code section.
     uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID) {
+      // Grow the required block size to account for the block header
+      Size += sizeof(*CurBlock);
+
       // FIXME: Alignement handling.
       FreeRangeHeader* candidateBlock = FreeMemoryList;
       FreeRangeHeader* head = FreeMemoryList;
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 2139df56205b..ed261a4194c9 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1770,23 +1770,41 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
   opStatus fs;
   assertArithmeticOK(*semantics);
 
+  // If the exponent is large enough, we know that this value is already
+  // integral, and the arithmetic below would potentially cause it to saturate
+  // to +/-Inf.  Bail out early instead.
+  if (exponent+1 >= (int)semanticsPrecision(*semantics))
+    return opOK;
+
   // The algorithm here is quite simple: we add 2^(p-1), where p is the
   // precision of our format, and then subtract it back off again.  The choice
   // of rounding modes for the addition/subtraction determines the rounding mode
   // for our integral rounding as well.
-  APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)),
-                        1 << (semanticsPrecision(*semantics)-1));
+  // NOTE: When the input value is negative, we do subtraction followed by
+  // addition instead.
+  APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
+  IntegerConstant <<= semanticsPrecision(*semantics)-1;
   APFloat MagicConstant(*semantics);
   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
                                       rmNearestTiesToEven);
+  MagicConstant.copySign(*this);
+
   if (fs != opOK)
     return fs;
 
+  // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+  bool inputSign = isNegative();
+
   fs = add(MagicConstant, rounding_mode);
   if (fs != opOK && fs != opInexact)
     return fs;
 
   fs = subtract(MagicConstant, rounding_mode);
+
+  // Restore the input sign.
+  if (inputSign != isNegative())
+    changeSign();
+
   return fs;
 }
 
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 99f8cd4cc370..d04f590f87ed 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -465,6 +465,118 @@ rety_open_create:
   return error_code::success();
 }
 
+error_code mapped_file_region::init(int fd, uint64_t offset) {
+  AutoFD FD(fd);
+
+  // Figure out how large the file is.
+  struct stat FileInfo;
+  if (fstat(fd, &FileInfo) == -1)
+    return error_code(errno, system_category());
+  uint64_t FileSize = FileInfo.st_size;
+
+  if (Size == 0)
+    Size = FileSize;
+  else if (FileSize < Size) {
+    // We need to grow the file.
+    if (ftruncate(fd, Size) == -1)
+      return error_code(errno, system_category());
+  }
+
+  int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
+  int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE);
+#ifdef MAP_FILE
+  flags |= MAP_FILE;
+#endif
+  Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+  if (Mapping == MAP_FAILED)
+    return error_code(errno, system_category());
+  return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping() {
+  // Make sure that the requested size fits within SIZE_T.
+  if (length > std::numeric_limits<size_t>::max()) {
+    ec = make_error_code(errc::invalid_argument);
+    return;
+  }
+
+  SmallString<128> path_storage;
+  StringRef name = path.toNullTerminatedStringRef(path_storage);
+  int oflags = (mode == readonly) ? O_RDONLY : O_RDWR;
+  int ofd = ::open(name.begin(), oflags);
+  if (ofd == -1) {
+    ec = error_code(errno, system_category());
+    return;
+  }
+
+  ec = init(ofd, offset);
+  if (ec)
+    Mapping = 0;
+}
+
+mapped_file_region::mapped_file_region(int fd,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping() {
+  // Make sure that the requested size fits within SIZE_T.
+  if (length > std::numeric_limits<size_t>::max()) {
+    ec = make_error_code(errc::invalid_argument);
+    return;
+  }
+
+  ec = init(fd, offset);
+  if (ec)
+    Mapping = 0;
+}
+
+mapped_file_region::~mapped_file_region() {
+  if (Mapping)
+    ::munmap(Mapping, Size);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+  : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
+  other.Mapping = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Size;
+}
+
+char *mapped_file_region::data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+  return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+  return Process::GetPageSize();
+}
+
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
                                                 StringRef path){
   SmallString<128> path_null(path);
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 66eeab058f62..696768ba9dd1 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -22,6 +22,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#undef max
+
 // MinGW doesn't define this.
 #ifndef _ERRNO_T_DEFINED
 #define _ERRNO_T_DEFINED
@@ -703,6 +705,203 @@ error_code get_magic(const Twine &path, uint32_t len,
   return error_code::success();
 }
 
+error_code mapped_file_region::init(int FD, uint64_t Offset) {
+  FileDescriptor = FD;
+  // Make sure that the requested size fits within SIZE_T.
+  if (Size > std::numeric_limits<SIZE_T>::max()) {
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return make_error_code(errc::invalid_argument);
+  }
+
+  DWORD flprotect;
+  switch (Mode) {
+  case readonly:  flprotect = PAGE_READONLY; break;
+  case readwrite: flprotect = PAGE_READWRITE; break;
+  case priv:      flprotect = PAGE_WRITECOPY; break;
+  default: llvm_unreachable("invalid mapping mode");
+  }
+
+  FileMappingHandle = ::CreateFileMapping(FileHandle,
+                                          0,
+                                          flprotect,
+                                          Size >> 32,
+                                          Size & 0xffffffff,
+                                          0);
+  if (FileMappingHandle == NULL) {
+    error_code ec = windows_error(GetLastError());
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return ec;
+  }
+
+  DWORD dwDesiredAccess;
+  switch (Mode) {
+  case readonly:  dwDesiredAccess = FILE_MAP_READ; break;
+  case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break;
+  case priv:      dwDesiredAccess = FILE_MAP_COPY; break;
+  default: llvm_unreachable("invalid mapping mode");
+  }
+  Mapping = ::MapViewOfFile(FileMappingHandle,
+                            dwDesiredAccess,
+                            Offset >> 32,
+                            Offset & 0xffffffff,
+                            Size);
+  if (Mapping == NULL) {
+    error_code ec = windows_error(GetLastError());
+    ::CloseHandle(FileMappingHandle);
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return ec;
+  }
+
+  if (Size == 0) {
+    MEMORY_BASIC_INFORMATION mbi;
+    SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
+    if (Result == 0) {
+      error_code ec = windows_error(GetLastError());
+      ::UnmapViewOfFile(Mapping);
+      ::CloseHandle(FileMappingHandle);
+      if (FileDescriptor)
+        _close(FileDescriptor);
+      else
+        ::CloseHandle(FileHandle);
+      return ec;
+    }
+    Size = mbi.RegionSize;
+  }
+  return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec) 
+  : Mode(mode)
+  , Size(length)
+  , Mapping()
+  , FileDescriptor()
+  , FileHandle(INVALID_HANDLE_VALUE)
+  , FileMappingHandle() {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  // Convert path to UTF-16.
+  if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+    return;
+
+  // Get file handle for creating a file mapping.
+  FileHandle = ::CreateFileW(c_str(path_utf16),
+                             Mode == readonly ? GENERIC_READ
+                                              : GENERIC_READ | GENERIC_WRITE,
+                             Mode == readonly ? FILE_SHARE_READ
+                                              : 0,
+                             0,
+                             Mode == readonly ? OPEN_EXISTING
+                                              : OPEN_ALWAYS,
+                             Mode == readonly ? FILE_ATTRIBUTE_READONLY
+                                              : FILE_ATTRIBUTE_NORMAL,
+                             0);
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    ec = windows_error(::GetLastError());
+    return;
+  }
+
+  FileDescriptor = 0;
+  ec = init(FileDescriptor, offset);
+  if (ec) {
+    Mapping = FileMappingHandle = 0;
+    FileHandle = INVALID_HANDLE_VALUE;
+    FileDescriptor = 0;
+  }
+}
+
+mapped_file_region::mapped_file_region(int fd,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping()
+  , FileDescriptor(fd)
+  , FileHandle(INVALID_HANDLE_VALUE)
+  , FileMappingHandle() {
+  FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    _close(FileDescriptor);
+    FileDescriptor = 0;
+    ec = make_error_code(errc::bad_file_descriptor);
+    return;
+  }
+
+  ec = init(FileDescriptor, offset);
+  if (ec) {
+    Mapping = FileMappingHandle = 0;
+    FileHandle = INVALID_HANDLE_VALUE;
+    FileDescriptor = 0;
+  }
+}
+
+mapped_file_region::~mapped_file_region() {
+  if (Mapping)
+    ::UnmapViewOfFile(Mapping);
+  if (FileMappingHandle)
+    ::CloseHandle(FileMappingHandle);
+  if (FileDescriptor)
+    _close(FileDescriptor);
+  else if (FileHandle != INVALID_HANDLE_VALUE)
+    ::CloseHandle(FileHandle);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+  : Mode(other.Mode)
+  , Size(other.Size)
+  , Mapping(other.Mapping)
+  , FileDescriptor(other.FileDescriptor)
+  , FileHandle(other.FileHandle)
+  , FileMappingHandle(other.FileMappingHandle) {
+  other.Mapping = other.FileMappingHandle = 0;
+  other.FileHandle = INVALID_HANDLE_VALUE;
+  other.FileDescriptor = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Size;
+}
+
+char *mapped_file_region::data() const {
+  assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+  SYSTEM_INFO SysInfo;
+  ::GetSystemInfo(&SysInfo);
+  return SysInfo.dwAllocationGranularity;
+}
+
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
                                                 StringRef path){
   SmallVector<wchar_t, 128> path_utf16;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 8536b94d716f..e9e2803ad579 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -532,7 +532,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
     // This modifier is not yet supported.
     case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
       return true;
-    case 'H': // The highest-numbered register of a pair.
+    case 'H': { // The highest-numbered register of a pair.
       const MachineOperand &MO = MI->getOperand(OpNum);
       if (!MO.isReg())
         return true;
@@ -547,6 +547,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       O << ARMInstPrinter::getRegisterName(Reg);
       return false;
     }
+    }
   }
 
   printOperand(MI, OpNum, O);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 057fd718fdb5..1cc5a17cb029 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1568,6 +1568,136 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
 }
 
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the corresponding opcode for the predicated pseudo-instruction.
+static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
+                                 const MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return 0;
+  if (!MRI.hasOneNonDBGUse(Reg))
+    return 0;
+  MI = MRI.getVRegDef(Reg);
+  if (!MI)
+    return 0;
+  // Check if MI has any non-dead defs or physreg uses. This also detects
+  // predicated instructions which will be reading CPSR.
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+      return 0;
+    if (MO.isDef() && !MO.isDead())
+      return 0;
+  }
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::ANDri:   return ARM::ANDCCri;
+  case ARM::ANDrr:   return ARM::ANDCCrr;
+  case ARM::ANDrsi:  return ARM::ANDCCrsi;
+  case ARM::ANDrsr:  return ARM::ANDCCrsr;
+  case ARM::t2ANDri: return ARM::t2ANDCCri;
+  case ARM::t2ANDrr: return ARM::t2ANDCCrr;
+  case ARM::t2ANDrs: return ARM::t2ANDCCrs;
+  case ARM::EORri:   return ARM::EORCCri;
+  case ARM::EORrr:   return ARM::EORCCrr;
+  case ARM::EORrsi:  return ARM::EORCCrsi;
+  case ARM::EORrsr:  return ARM::EORCCrsr;
+  case ARM::t2EORri: return ARM::t2EORCCri;
+  case ARM::t2EORrr: return ARM::t2EORCCrr;
+  case ARM::t2EORrs: return ARM::t2EORCCrs;
+  case ARM::ORRri:   return ARM::ORRCCri;
+  case ARM::ORRrr:   return ARM::ORRCCrr;
+  case ARM::ORRrsi:  return ARM::ORRCCrsi;
+  case ARM::ORRrsr:  return ARM::ORRCCrsr;
+  case ARM::t2ORRri: return ARM::t2ORRCCri;
+  case ARM::t2ORRrr: return ARM::t2ORRCCrr;
+  case ARM::t2ORRrs: return ARM::t2ORRCCrs;
+
+  // ARM ADD/SUB
+  case ARM::ADDri:   return ARM::ADDCCri;
+  case ARM::ADDrr:   return ARM::ADDCCrr;
+  case ARM::ADDrsi:  return ARM::ADDCCrsi;
+  case ARM::ADDrsr:  return ARM::ADDCCrsr;
+  case ARM::SUBri:   return ARM::SUBCCri;
+  case ARM::SUBrr:   return ARM::SUBCCrr;
+  case ARM::SUBrsi:  return ARM::SUBCCrsi;
+  case ARM::SUBrsr:  return ARM::SUBCCrsr;
+
+  // Thumb2 ADD/SUB
+  case ARM::t2ADDri:   return ARM::t2ADDCCri;
+  case ARM::t2ADDri12: return ARM::t2ADDCCri12;
+  case ARM::t2ADDrr:   return ARM::t2ADDCCrr;
+  case ARM::t2ADDrs:   return ARM::t2ADDCCrs;
+  case ARM::t2SUBri:   return ARM::t2SUBCCri;
+  case ARM::t2SUBri12: return ARM::t2SUBCCri12;
+  case ARM::t2SUBrr:   return ARM::t2SUBCCrr;
+  case ARM::t2SUBrs:   return ARM::t2SUBCCrs;
+  }
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     unsigned &TrueOp, unsigned &FalseOp,
+                                     bool &Optimizable) const {
+  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+         "Unknown select instruction");
+  // MOVCC operands:
+  // 0: Def.
+  // 1: True use.
+  // 2: False use.
+  // 3: Condition code.
+  // 4: CPSR use.
+  TrueOp = 1;
+  FalseOp = 2;
+  Cond.push_back(MI->getOperand(3));
+  Cond.push_back(MI->getOperand(4));
+  // We can always fold a def.
+  Optimizable = true;
+  return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+                                               bool PreferFalse) const {
+  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+         "Unknown select instruction");
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineInstr *DefMI = 0;
+  unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
+  bool Invert = !Opc;
+  if (!Opc)
+    Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
+  if (!Opc)
+    return 0;
+
+  // Create a new predicated version of DefMI.
+  // Rfalse is the first use.
+  MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                      get(Opc), MI->getOperand(0).getReg())
+    .addOperand(MI->getOperand(Invert ? 2 : 1));
+
+  // Copy all the DefMI operands, excluding its (null) predicate.
+  const MCInstrDesc &DefDesc = DefMI->getDesc();
+  for (unsigned i = 1, e = DefDesc.getNumOperands();
+       i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+    NewMI.addOperand(DefMI->getOperand(i));
+
+  unsigned CondCode = MI->getOperand(3).getImm();
+  if (Invert)
+    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+  else
+    NewMI.addImm(CondCode);
+  NewMI.addOperand(MI->getOperand(4));
+
+  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+  if (NewMI->hasOptionalDef())
+    AddDefaultCC(NewMI);
+
+  // The caller will erase MI, but not DefMI.
+  DefMI->eraseFromParent();
+  return NewMI;
+}
+
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
 /// def operand.
@@ -3224,11 +3354,18 @@ enum ARMExeDomain {
 //
 std::pair<uint16_t, uint16_t>
 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
-  // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
-  // predicated.
+  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+  // if they are not predicated.
   if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
 
+  // Cortex-A9 is particularly picky about mixing the two and wants these
+  // converted.
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+      (MI->getOpcode() == ARM::VMOVRS ||
+       MI->getOpcode() == ARM::VMOVSR))
+    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
   // No other instructions can be swizzled, so just determine their domain.
   unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
 
@@ -3248,22 +3385,97 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
 
 void
 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
-  // We only know how to change VMOVD into VORR.
-  assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
-  if (Domain != ExeNEON)
-    return;
+  unsigned DstReg, SrcReg, DReg;
+  unsigned Lane;
+  MachineInstrBuilder MIB(MI);
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  bool isKill;
+  switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("cannot handle opcode!");
+      break;
+    case ARM::VMOVD:
+      if (Domain != ExeNEON)
+        break;
+
+      // Zap the predicate operands.
+      assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+
+      // Change to a VORRd which requires two identical use operands.
+      MI->setDesc(get(ARM::VORRd));
+
+      // Add the extra source operand and new predicates.
+      // This will go before any implicit ops.
+      AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+      break;
+    case ARM::VMOVRS:
+      if (Domain != ExeNEON)
+        break;
+      assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+      DstReg = MI->getOperand(0).getReg();
+      SrcReg = MI->getOperand(1).getReg();
+
+      DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
+      Lane = 0;
+      if (DReg == ARM::NoRegister) {
+        DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
+        Lane = 1;
+        assert(DReg && "S-register with no D super-register?");
+      }
+
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+      MI->RemoveOperand(1);
 
-  // Zap the predicate operands.
-  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
-  MI->RemoveOperand(3);
-  MI->RemoveOperand(2);
+      MI->setDesc(get(ARM::VGETLNi32));
+      MIB.addReg(DReg);
+      MIB.addImm(Lane);
 
-  // Change to a VORRd which requires two identical use operands.
-  MI->setDesc(get(ARM::VORRd));
+      MIB->getOperand(1).setIsUndef();
+      MIB.addReg(SrcReg, RegState::Implicit);
+
+      AddDefaultPred(MIB);
+      break;
+    case ARM::VMOVSR:
+      if (Domain != ExeNEON)
+        break;
+      assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+      DstReg = MI->getOperand(0).getReg();
+      SrcReg = MI->getOperand(1).getReg();
+      DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
+      Lane = 0;
+      if (DReg == ARM::NoRegister) {
+        DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
+        Lane = 1;
+        assert(DReg && "S-register with no D super-register?");
+      }
+      isKill = MI->getOperand(0).isKill();
+
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+      MI->RemoveOperand(1);
+      MI->RemoveOperand(0);
+
+      MI->setDesc(get(ARM::VSETLNi32));
+      MIB.addReg(DReg);
+      MIB.addReg(DReg);
+      MIB.addReg(SrcReg);
+      MIB.addImm(Lane);
+
+      MIB->getOperand(1).setIsUndef();
+
+      if (isKill)
+        MIB->addRegisterKilled(DstReg, TRI, true);
+      MIB->addRegisterDefined(DstReg, TRI);
+
+      AddDefaultPred(MIB);
+      break;
+  }
 
-  // Add the extra source operand and new predicates.
-  // This will go before any implicit ops.
-  AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
 }
 
 bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a10a4ab1c52..92e5ee8dcbd3 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -202,6 +202,13 @@ public:
                                     unsigned SrcReg2, int CmpMask, int CmpValue,
                                     const MachineRegisterInfo *MRI) const;
 
+  virtual bool analyzeSelect(const MachineInstr *MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const;
+
+  virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
   /// FoldImmediate - 'Reg' is known to be defined by a move immediate
   /// instruction, try to fold the immediate into the use instruction.
   virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
 int getMatchingCondBranchOpcode(int Opc);
 
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+                                  MachineInstr *&MI,
+                                  const MachineRegisterInfo &MRI);
 
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
 /// the instruction is encoded with an 'S' bit is determined by the optional
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 57f811603945..bf9d16eea181 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1821,9 +1821,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   default:
     llvm_unreachable("Unsupported calling convention");
   case CallingConv::Fast:
-    // Ignore fastcc. Silence compiler warnings.
-    (void)RetFastCC_ARM_APCS;
-    (void)FastCC_ARM_APCS;
+    if (Subtarget->hasVFP2() && !isVarArg) {
+      if (!Subtarget->isAAPCS_ABI())
+        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+      // For AAPCS ABI targets, just use VFP variant of the calling convention.
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    }
     // Fallthrough
   case CallingConv::C:
     // Use target triple & subtarget features to do actual dispatch.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ee349a753fef..a3a6c3176bea 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2385,8 +2385,10 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
       case ARMISD::COR:  Opc = ARM::t2ORRCCrs; break;
       case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
       }
-      SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
-      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+      SDValue Ops[] = {
+        FalseVal, FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag
+      };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
     }
 
     ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
@@ -2401,8 +2403,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
         case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
         }
         SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
-        SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
-        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+        SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
       }
     }
 
@@ -2413,8 +2415,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
     case ARMISD::COR:  Opc = ARM::t2ORRCCrr; break;
     case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
     }
-    SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+    SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
   }
 
   SDValue CPTmp0;
@@ -2428,8 +2430,10 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
     case ARMISD::COR:  Opc = ARM::ORRCCrsi; break;
     case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
     }
-    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+    SDValue Ops[] = {
+      FalseVal, FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag
+    };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
   }
 
   if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
@@ -2440,8 +2444,10 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
     case ARMISD::COR:  Opc = ARM::ORRCCrsr; break;
     case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
     }
-    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
+    SDValue Ops[] = {
+      FalseVal, FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag
+    };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 9);
   }
 
   ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
@@ -2456,8 +2462,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
       case ARMISD::CXOR: Opc = ARM::EORCCri; break;
       }
       SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
-      SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
-      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+      SDValue Ops[] = { FalseVal, FalseVal, True, CC, CCR, Reg0, InFlag };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
     }
   }
 
@@ -2468,8 +2474,8 @@ SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
   case ARMISD::COR:  Opc = ARM::ORRCCrr; break;
   case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
   }
-  SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
-  return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+  SDValue Ops[] = { FalseVal, FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+  return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
 }
 
 /// Target-specific DAG combining for ISD::XOR.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c66618a8ef5f..190ca076dae5 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -6973,6 +6973,27 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 //                           ARM Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  if (!C)
+    return false;
+  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+}
+
+// Combine a constant select operand into its use:
+//
+//   (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+//   (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null.
+//
+// @param N       The node to transform.
+// @param Slct    The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI     Context.
+// @returns The new node, or SDValue() on failure.
 static
 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
                             TargetLowering::DAGCombinerInfo &DCI) {
@@ -6998,16 +7019,12 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
   assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
           "Bad input!");
 
-  if (LHS.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(LHS)->isNullValue()) {
+  if (isZeroOrAllOnes(LHS, false)) {
     DoXform = true;
-  } else if (CC != ISD::SETCC_INVALID &&
-             RHS.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHS)->isNullValue()) {
+  } else if (CC != ISD::SETCC_INVALID && isZeroOrAllOnes(RHS, false)) {
     std::swap(LHS, RHS);
     SDValue Op0 = Slct.getOperand(0);
-    EVT OpVT = isSlctCC ? Op0.getValueType() :
-                          Op0.getOperand(0).getValueType();
+    EVT OpVT = isSlctCC ? Op0.getValueType() : Op0.getOperand(0).getValueType();
     bool isInt = OpVT.isInteger();
     CC = ISD::getSetCCInverse(CC, isInt);
 
@@ -7018,19 +7035,19 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
     InvCC = true;
   }
 
-  if (DoXform) {
-    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
-    if (isSlctCC)
-      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
-                             Slct.getOperand(0), Slct.getOperand(1), CC);
-    SDValue CCOp = Slct.getOperand(0);
-    if (InvCC)
-      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
-                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
-    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
-                       CCOp, OtherOp, Result);
-  }
-  return SDValue();
+  if (!DoXform)
+    return SDValue();
+
+  SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+  if (isSlctCC)
+    return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+                           Slct.getOperand(0), Slct.getOperand(1), CC);
+  SDValue CCOp = Slct.getOperand(0);
+  if (InvCC)
+    CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+                        CCOp.getOperand(0), CCOp.getOperand(1), CC);
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                     CCOp, OtherOp, Result);
 }
 
 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
@@ -7297,16 +7314,8 @@ static SDValue PerformMULCombine(SDNode *N,
 }
 
 static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
-  if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
-    return false;
-
-  SDValue FalseVal = N.getOperand(0);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
-  if (!C)
-    return false;
-  if (AllOnes)
-    return C->isAllOnesValue();
-  return C->isNullValue();
+  return N.getOpcode() == ARMISD::CMOV && N.getNode()->hasOneUse() &&
+    isZeroOrAllOnes(N.getOperand(0), AllOnes);
 }
 
 /// formConditionalOp - Combine an operation with a conditional move operand
@@ -8808,6 +8817,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   case MVT::i16:
   case MVT::i32:
     return true;
+  case MVT::f64:
+    return Subtarget->hasNEON();
   // FIXME: VLD1 etc with standard alignment is legal.
   }
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 6340a58f1a0f..992aba5803f6 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -242,6 +242,9 @@ def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
 def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                  "Subtarget->isTargetDarwin()">;
 
+def IsLE             : Predicate<"TLI.isLittleEndian()">;
+def IsBE             : Predicate<"TLI.isBigEndian()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -3936,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs),
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
 
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
                            4, IIC_iCMOVr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
@@ -3989,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
                           InstrItinClass iii, InstrItinClass iir,
                           InstrItinClass iis> {
   def ri  : ARMPseudoExpand<(outs GPR:$Rd),
-                            (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
+                                 pred:$p, cc_out:$s),
                             4, iii, [],
                        (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
   def rr  : ARMPseudoExpand<(outs GPR:$Rd),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
+                                 pred:$p, cc_out:$s),
                             4, iir, [],
                            (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
   def rsi : ARMPseudoExpand<(outs GPR:$Rd),
-                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
+                                 pred:$p, cc_out:$s),
                             4, iis, [],
                 (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
   def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
-                       (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+                           (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
+                                pred:$p, cc_out:$s),
                             4, iis, [],
                 (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
 }
 
 defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
@@ -4016,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
                             IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
 defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
                             IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
 
 } // neverHasSideEffects
 
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 31340881920d..048d340df006 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> {
   let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
 }
 
+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                                 (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                                    (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
 
 //===----------------------------------------------------------------------===//
 // NEON-specific DAG Nodes.
@@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
 
 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 
+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+          (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+          (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+          (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
 
 //===----------------------------------------------------------------------===//
 // NEON pattern fragments
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 307006f413a8..8ecf0091d8b6 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -757,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
      let Inst{24} = 1;
      let Inst{23-21} = op23_21;
    }
+
+   // Predicated versions.
+   def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUi, [],
+                             (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
+                              GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
+   def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
+                                  pred:$p),
+                             4, IIC_iALUi, [],
+                             (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
+                              GPR:$Rn, imm0_4095:$imm, pred:$p)>,
+                RegConstraint<"$Rfalse = $Rd">;
+   def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUr, [],
+                             (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
+                              GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
+   def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
+                             (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
+                              GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
 }
 
 /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@@ -2938,7 +2965,7 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
 
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
 def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
                             (ins rGPR:$false, rGPR:$Rm, pred:$p),
                             4, IIC_iCMOVr,
@@ -3026,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
                    InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
    // shifted imm
    def ri : t2PseudoExpand<(outs rGPR:$Rd),
-                           (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
+                                pred:$p, cc_out:$s),
                            4, iii, [],
                   (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
    // register
    def rr : t2PseudoExpand<(outs rGPR:$Rd),
-                           (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
+                                pred:$p, cc_out:$s),
                            4, iir, [],
                         (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
    // shifted register
    def rs : t2PseudoExpand<(outs rGPR:$Rd),
-                       (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+                                pred:$p, cc_out:$s),
                            4, iis, [],
             (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
 } // T2I_bincc_irs
 
 defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 23c132e4f6a8..eb7eaa6c9708 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>,
   let ParserMatchClass = FPImmOperand;
 }
 
+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
 // The VCVT to/from fixed-point instructions encode the 'fbits' operand
 // (the number of fixed bits) differently than it appears in the assembly
 // source. It's encoded as "Size - fbits" where Size is the size of the
@@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
@@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
@@ -433,25 +442,25 @@ def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
 // Between half-precision and single-precision.  For disassembly only.
 
 // FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
-def : ARMPat<(f32_to_f16 SPR:$a),
-             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
+def : ARMPat<(f32_to_f16 SPR:$a),
+             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
 def : ARMPat<(f16_to_f32 GPR:$a),
              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index e47bf66e3afe..c90751d0b962 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,10 +18,12 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
@@ -383,7 +385,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
 static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
 #include "ARMGenDisassemblerTables.inc"
-#include "ARMGenInstrInfo.inc"
 #include "ARMGenEDInfo.inc"
 
 static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
@@ -427,7 +428,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                   (bytes[0] <<  0);
 
   // Calling the auto-generated decoder function.
-  DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+  DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
+                                          Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     return result;
@@ -436,14 +438,15 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   // VFP and NEON instructions, similarly, are shared between ARM
   // and Thumb modes.
   MI.clear();
-  result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     return result;
   }
 
   MI.clear();
-  result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
@@ -454,7 +457,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
@@ -465,7 +469,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
@@ -765,7 +770,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   uint16_t insn16 = (bytes[1] << 8) | bytes[0];
-  DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+  DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
+                                          Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 2;
     Check(result, AddThumbPredicate(MI));
@@ -773,7 +779,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
+                             Address, this, STI);
   if (result) {
     Size = 2;
     bool InITBlock = ITBlock.instrInITBlock();
@@ -783,7 +790,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumb216, MI, insn16,
+                             Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 2;
 
@@ -818,7 +826,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                     (bytes[1] << 24) |
                     (bytes[0] << 16);
   MI.clear();
-  result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     bool InITBlock = ITBlock.instrInITBlock();
@@ -828,7 +837,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     Check(result, AddThumbPredicate(MI));
@@ -836,7 +846,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     UpdateThumbVFPPredicate(MI);
@@ -844,19 +854,21 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     Check(result, AddThumbPredicate(MI));
     return result;
   }
 
-  if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+  if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
     MI.clear();
     uint32_t NEONLdStInsn = insn32;
     NEONLdStInsn &= 0xF0FFFFFF;
     NEONLdStInsn |= 0x04000000;
-    result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+    result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+                               Address, this, STI);
     if (result != MCDisassembler::Fail) {
       Size = 4;
       Check(result, AddThumbPredicate(MI));
@@ -864,13 +876,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
-  if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+  if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
     MI.clear();
     uint32_t NEONDataInsn = insn32;
     NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
     NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
     NEONDataInsn |= 0x12000000; // Set bits 28 and 25
-    result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+    result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+                               Address, this, STI);
     if (result != MCDisassembler::Fail) {
       Size = 4;
       Check(result, AddThumbPredicate(MI));
@@ -1117,9 +1130,9 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  unsigned type = fieldFromInstruction32(Val, 5, 2);
-  unsigned imm = fieldFromInstruction32(Val, 7, 5);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  unsigned type = fieldFromInstruction(Val, 5, 2);
+  unsigned imm = fieldFromInstruction(Val, 7, 5);
 
   // Register-immediate
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1154,9 +1167,9 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  unsigned type = fieldFromInstruction32(Val, 5, 2);
-  unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  unsigned type = fieldFromInstruction(Val, 5, 2);
+  unsigned Rs = fieldFromInstruction(Val, 8, 4);
 
   // Register-register
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1224,8 +1237,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Vd = fieldFromInstruction32(Val, 8, 5);
-  unsigned regs = fieldFromInstruction32(Val, 0, 8);
+  unsigned Vd = fieldFromInstruction(Val, 8, 5);
+  unsigned regs = fieldFromInstruction(Val, 0, 8);
 
   if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -1241,8 +1254,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Vd = fieldFromInstruction32(Val, 8, 5);
-  unsigned regs = fieldFromInstruction32(Val, 0, 8);
+  unsigned Vd = fieldFromInstruction(Val, 8, 5);
+  unsigned regs = fieldFromInstruction(Val, 0, 8);
 
   regs = regs >> 1;
 
@@ -1263,8 +1276,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
   // the mask of all bits LSB-and-lower, and then xor them to create
   // the mask of that's all ones on [msb, lsb].  Finally we not it to
   // create the final mask.
-  unsigned msb = fieldFromInstruction32(Val, 5, 5);
-  unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+  unsigned msb = fieldFromInstruction(Val, 5, 5);
+  unsigned lsb = fieldFromInstruction(Val, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
   if (lsb > msb) Check(S, MCDisassembler::SoftFail);
@@ -1281,12 +1294,12 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
-  unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 8);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned U = fieldFromInstruction32(Insn, 23, 1);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned CRd = fieldFromInstruction(Insn, 12, 4);
+  unsigned coproc = fieldFromInstruction(Insn, 8, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 8);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned U = fieldFromInstruction(Insn, 23, 1);
 
   switch (Inst.getOpcode()) {
     case ARM::LDC_OFFSET:
@@ -1426,14 +1439,14 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
                               uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned reg = fieldFromInstruction32(Insn, 25, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned reg = fieldFromInstruction(Insn, 25, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
 
   // On stores, the writeback operand precedes Rt.
   switch (Inst.getOpcode()) {
@@ -1476,7 +1489,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   ARM_AM::AddrOpc Op = ARM_AM::add;
-  if (!fieldFromInstruction32(Insn, 23, 1))
+  if (!fieldFromInstruction(Insn, 23, 1))
     Op = ARM_AM::sub;
 
   bool writeback = (P == 0) || (W == 1);
@@ -1493,7 +1506,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
     if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
       return MCDisassembler::Fail;
     ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
-    switch( fieldFromInstruction32(Insn, 5, 2)) {
+    switch( fieldFromInstruction(Insn, 5, 2)) {
       case 0:
         Opc = ARM_AM::lsl;
         break;
@@ -1509,7 +1522,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
       default:
         return MCDisassembler::Fail;
     }
-    unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+    unsigned amt = fieldFromInstruction(Insn, 7, 5);
     unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
 
     Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1529,11 +1542,11 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 13, 4);
-  unsigned Rm = fieldFromInstruction32(Val,  0, 4);
-  unsigned type = fieldFromInstruction32(Val, 5, 2);
-  unsigned imm = fieldFromInstruction32(Val, 7, 5);
-  unsigned U = fieldFromInstruction32(Val, 12, 1);
+  unsigned Rn = fieldFromInstruction(Val, 13, 4);
+  unsigned Rm = fieldFromInstruction(Val,  0, 4);
+  unsigned type = fieldFromInstruction(Val, 5, 2);
+  unsigned imm = fieldFromInstruction(Val, 7, 5);
+  unsigned U = fieldFromInstruction(Val, 12, 1);
 
   ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
   switch (type) {
@@ -1570,15 +1583,15 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned type = fieldFromInstruction32(Insn, 22, 1);
-  unsigned imm = fieldFromInstruction32(Insn, 8, 4);
-  unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned type = fieldFromInstruction(Insn, 22, 1);
+  unsigned imm = fieldFromInstruction(Insn, 8, 4);
+  unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
   unsigned Rt2 = Rt + 1;
 
   bool writeback = (W == 1) | (P == 0);
@@ -1609,7 +1622,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
         S = MCDisassembler::SoftFail;
       if (Rt2 == 15)
         S = MCDisassembler::SoftFail;
-      if (!type && fieldFromInstruction32(Insn, 8, 4))
+      if (!type && fieldFromInstruction(Insn, 8, 4))
         S = MCDisassembler::SoftFail;
       break;
     case ARM::STRH:
@@ -1761,8 +1774,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned mode = fieldFromInstruction(Insn, 23, 2);
 
   switch (mode) {
     case 0:
@@ -1791,9 +1804,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned reglist = fieldFromInstruction(Insn, 0, 16);
 
   if (pred == 0xF) {
     switch (Inst.getOpcode()) {
@@ -1850,9 +1863,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
     }
 
     // For stores (which become SRS's, the only operand is the mode.
-    if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+    if (fieldFromInstruction(Insn, 20, 1) == 0) {
       Inst.addOperand(
-          MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+          MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
       return S;
     }
 
@@ -1873,10 +1886,10 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
 
 static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
-  unsigned imod = fieldFromInstruction32(Insn, 18, 2);
-  unsigned M = fieldFromInstruction32(Insn, 17, 1);
-  unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
-  unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+  unsigned imod = fieldFromInstruction(Insn, 18, 2);
+  unsigned M = fieldFromInstruction(Insn, 17, 1);
+  unsigned iflags = fieldFromInstruction(Insn, 6, 3);
+  unsigned mode = fieldFromInstruction(Insn, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1913,10 +1926,10 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
-  unsigned imod = fieldFromInstruction32(Insn, 9, 2);
-  unsigned M = fieldFromInstruction32(Insn, 8, 1);
-  unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
-  unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+  unsigned imod = fieldFromInstruction(Insn, 9, 2);
+  unsigned M = fieldFromInstruction(Insn, 8, 1);
+  unsigned iflags = fieldFromInstruction(Insn, 5, 3);
+  unsigned mode = fieldFromInstruction(Insn, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1955,13 +1968,13 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 8, 4);
   unsigned imm = 0;
 
-  imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
-  imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
-  imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
-  imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+  imm |= (fieldFromInstruction(Insn, 0, 8) << 0);
+  imm |= (fieldFromInstruction(Insn, 12, 3) << 8);
+  imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+  imm |= (fieldFromInstruction(Insn, 26, 1) << 11);
 
   if (Inst.getOpcode() == ARM::t2MOVTi16)
     if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -1979,12 +1992,12 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
   unsigned imm = 0;
 
-  imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
-  imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+  imm |= (fieldFromInstruction(Insn, 0, 12) << 0);
+  imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
 
   if (Inst.getOpcode() == ARM::MOVTi16)
     if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2005,11 +2018,11 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
-  unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 8, 4);
+  unsigned Ra = fieldFromInstruction(Insn, 12, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (pred == 0xF)
     return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -2033,9 +2046,9 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned add = fieldFromInstruction32(Val, 12, 1);
-  unsigned imm = fieldFromInstruction32(Val, 0, 12);
-  unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+  unsigned add = fieldFromInstruction(Val, 12, 1);
+  unsigned imm = fieldFromInstruction(Val, 0, 12);
+  unsigned Rn = fieldFromInstruction(Val, 13, 4);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2053,9 +2066,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 9, 4);
-  unsigned U = fieldFromInstruction32(Val, 8, 1);
-  unsigned imm = fieldFromInstruction32(Val, 0, 8);
+  unsigned Rn = fieldFromInstruction(Val, 9, 4);
+  unsigned U = fieldFromInstruction(Val, 8, 1);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2077,11 +2090,11 @@ static DecodeStatus
 DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
                      uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
-                 (fieldFromInstruction32(Insn, 11, 1) << 18) |
-                 (fieldFromInstruction32(Insn, 13, 1) << 17) |
-                 (fieldFromInstruction32(Insn, 16, 6) << 11) |
-                 (fieldFromInstruction32(Insn, 26, 1) << 19);
+  unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) |
+                 (fieldFromInstruction(Insn, 11, 1) << 18) |
+                 (fieldFromInstruction(Insn, 13, 1) << 17) |
+                 (fieldFromInstruction(Insn, 16, 6) << 11) |
+                 (fieldFromInstruction(Insn, 26, 1) << 19);
   if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
                                 true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
@@ -2093,12 +2106,12 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2;
 
   if (pred == 0xF) {
     Inst.setOpcode(ARM::BLXi);
-    imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+    imm |= fieldFromInstruction(Insn, 24, 1) << 1;
     if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
                                   true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
@@ -2119,8 +2132,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  unsigned align = fieldFromInstruction32(Val, 4, 2);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  unsigned align = fieldFromInstruction(Val, 4, 2);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2136,12 +2149,12 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned wb = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned wb = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   // First output register
   switch (Inst.getOpcode()) {
@@ -2410,12 +2423,12 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned wb = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned wb = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   // Writeback Operand
   switch (Inst.getOpcode()) {
@@ -2681,12 +2694,12 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned align = fieldFromInstruction32(Insn, 4, 1);
-  unsigned size = fieldFromInstruction32(Insn, 6, 2);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned align = fieldFromInstruction(Insn, 4, 1);
+  unsigned size = fieldFromInstruction(Insn, 6, 2);
 
   align *= (1 << size);
 
@@ -2726,12 +2739,12 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned align = fieldFromInstruction32(Insn, 4, 1);
-  unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned align = fieldFromInstruction(Insn, 4, 1);
+  unsigned size = 1 << fieldFromInstruction(Insn, 6, 2);
   align *= 2*size;
 
   switch (Inst.getOpcode()) {
@@ -2774,11 +2787,11 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2809,13 +2822,13 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned size = fieldFromInstruction32(Insn, 6, 2);
-  unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
-  unsigned align = fieldFromInstruction32(Insn, 4, 1);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned size = fieldFromInstruction(Insn, 6, 2);
+  unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+  unsigned align = fieldFromInstruction(Insn, 4, 1);
 
   if (size == 0x3) {
     size = 4;
@@ -2862,14 +2875,14 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned imm = fieldFromInstruction32(Insn, 0, 4);
-  imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
-  imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
-  imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
-  imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
-  unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned imm = fieldFromInstruction(Insn, 0, 4);
+  imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+  imm |= fieldFromInstruction(Insn, 24, 1) << 7;
+  imm |= fieldFromInstruction(Insn, 8, 4) << 8;
+  imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+  unsigned Q = fieldFromInstruction(Insn, 6, 1);
 
   if (Q) {
     if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2907,11 +2920,11 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 18, 2);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 18, 2);
 
   if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2950,13 +2963,13 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
-  unsigned op = fieldFromInstruction32(Insn, 6, 1);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  Rn |= fieldFromInstruction(Insn, 7, 1) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+  unsigned op = fieldFromInstruction(Insn, 6, 1);
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2986,8 +2999,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
                                      uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned dst = fieldFromInstruction16(Insn, 8, 3);
-  unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+  unsigned dst = fieldFromInstruction(Insn, 8, 3);
+  unsigned imm = fieldFromInstruction(Insn, 0, 8);
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3034,8 +3047,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 0, 3);
-  unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+  unsigned Rn = fieldFromInstruction(Val, 0, 3);
+  unsigned Rm = fieldFromInstruction(Val, 3, 3);
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3049,8 +3062,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 0, 3);
-  unsigned imm = fieldFromInstruction32(Val, 3, 5);
+  unsigned Rn = fieldFromInstruction(Val, 0, 3);
+  unsigned imm = fieldFromInstruction(Val, 3, 5);
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3081,9 +3094,9 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 6, 4);
-  unsigned Rm = fieldFromInstruction32(Val, 2, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 2);
+  unsigned Rn = fieldFromInstruction(Val, 6, 4);
+  unsigned Rm = fieldFromInstruction(Val, 2, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 2);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3104,13 +3117,13 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
     case ARM::t2PLIs:
       break;
     default: {
-      unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+      unsigned Rt = fieldFromInstruction(Insn, 12, 4);
       if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
     return MCDisassembler::Fail;
     }
   }
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   if (Rn == 0xF) {
     switch (Inst.getOpcode()) {
       case ARM::t2LDRBs:
@@ -3133,16 +3146,16 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
         return MCDisassembler::Fail;
     }
 
-    int imm = fieldFromInstruction32(Insn, 0, 12);
-    if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+    int imm = fieldFromInstruction(Insn, 0, 12);
+    if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1;
     Inst.addOperand(MCOperand::CreateImm(imm));
 
     return S;
   }
 
-  unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
-  addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
-  addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+  unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+  addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+  addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
   if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
     return MCDisassembler::Fail;
 
@@ -3167,8 +3180,8 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 9, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 9);
+  unsigned Rn = fieldFromInstruction(Val, 9, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 9);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3182,8 +3195,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 8, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 8);
+  unsigned Rn = fieldFromInstruction(Val, 8, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3210,8 +3223,8 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 9, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 9);
+  unsigned Rn = fieldFromInstruction(Val, 9, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 9);
 
   // Some instructions always use an additive offset.
   switch (Inst.getOpcode()) {
@@ -3241,12 +3254,12 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned addr = fieldFromInstruction32(Insn, 0, 8);
-  addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned addr = fieldFromInstruction(Insn, 0, 8);
+  addr |= fieldFromInstruction(Insn, 9, 1) << 8;
   addr |= Rn << 9;
-  unsigned load = fieldFromInstruction32(Insn, 20, 1);
+  unsigned load = fieldFromInstruction(Insn, 20, 1);
 
   if (!load) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3271,8 +3284,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 13, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 12);
+  unsigned Rn = fieldFromInstruction(Val, 13, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 12);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3284,7 +3297,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+  unsigned imm = fieldFromInstruction(Insn, 0, 7);
 
   Inst.addOperand(MCOperand::CreateReg(ARM::SP));
   Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3298,8 +3311,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
   DecodeStatus S = MCDisassembler::Success;
 
   if (Inst.getOpcode() == ARM::tADDrSP) {
-    unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
-    Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+    unsigned Rdm = fieldFromInstruction(Insn, 0, 3);
+    Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
 
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3307,7 +3320,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
     return MCDisassembler::Fail;
   } else if (Inst.getOpcode() == ARM::tADDspr) {
-    unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+    unsigned Rm = fieldFromInstruction(Insn, 3, 4);
 
     Inst.addOperand(MCOperand::CreateReg(ARM::SP));
     Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3320,8 +3333,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
 
 static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
                            uint64_t Address, const void *Decoder) {
-  unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
-  unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+  unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
+  unsigned flags = fieldFromInstruction(Insn, 0, 3);
 
   Inst.addOperand(MCOperand::CreateImm(imod));
   Inst.addOperand(MCOperand::CreateImm(flags));
@@ -3332,8 +3345,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
 static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
                              uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned add = fieldFromInstruction32(Insn, 4, 1);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned add = fieldFromInstruction(Insn, 4, 1);
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3380,8 +3393,8 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
                        uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3396,9 +3409,9 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+  unsigned pred = fieldFromInstruction(Insn, 22, 4);
   if (pred == 0xE || pred == 0xF) {
-    unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+    unsigned opc = fieldFromInstruction(Insn, 4, 28);
     switch (opc) {
       default:
         return MCDisassembler::Fail;
@@ -3413,15 +3426,15 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
         break;
     }
 
-    unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+    unsigned imm = fieldFromInstruction(Insn, 0, 4);
     return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
   }
 
-  unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
-  brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
-  brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
-  brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
-  brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+  unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1;
+  brtarget |= fieldFromInstruction(Insn, 11, 1) << 19;
+  brtarget |= fieldFromInstruction(Insn, 13, 1) << 18;
+  brtarget |= fieldFromInstruction(Insn, 16, 6) << 12;
+  brtarget |= fieldFromInstruction(Insn, 26, 1) << 20;
 
   if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3436,10 +3449,10 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
 // a splat operation or a rotation.
 static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
                           uint64_t Address, const void *Decoder) {
-  unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+  unsigned ctrl = fieldFromInstruction(Val, 10, 2);
   if (ctrl == 0) {
-    unsigned byte = fieldFromInstruction32(Val, 8, 2);
-    unsigned imm = fieldFromInstruction32(Val, 0, 8);
+    unsigned byte = fieldFromInstruction(Val, 8, 2);
+    unsigned imm = fieldFromInstruction(Val, 0, 8);
     switch (byte) {
       case 0:
         Inst.addOperand(MCOperand::CreateImm(imm));
@@ -3456,8 +3469,8 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
         break;
     }
   } else {
-    unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
-    unsigned rot = fieldFromInstruction32(Val, 7, 5);
+    unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
+    unsigned rot = fieldFromInstruction(Val, 7, 5);
     unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
     Inst.addOperand(MCOperand::CreateImm(imm));
   }
@@ -3517,9 +3530,9 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
 
@@ -3540,10 +3553,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
                                          uint64_t Address, const void *Decoder){
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3567,12 +3580,12 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
 
@@ -3592,13 +3605,13 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
   if (Rm == 0xF) S = MCDisassembler::SoftFail;
@@ -3620,12 +3633,12 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
 
@@ -3645,12 +3658,12 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
 
@@ -3670,11 +3683,11 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3682,22 +3695,22 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 2) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 2) != 0)
         align = 4;
   }
 
@@ -3729,11 +3742,11 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3741,22 +3754,22 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 2) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 2) != 0)
         align = 4;
   }
 
@@ -3787,11 +3800,11 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3800,24 +3813,24 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      index = fieldFromInstruction32(Insn, 5, 3);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 5, 3);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 1:
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 1) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 1) != 0)
         align = 8;
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -3854,11 +3867,11 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3867,24 +3880,24 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      index = fieldFromInstruction32(Insn, 5, 3);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 5, 3);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 1:
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 1) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 1) != 0)
         align = 8;
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -3918,11 +3931,11 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3931,22 +3944,22 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
+      if (fieldFromInstruction(Insn, 4, 2))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -3988,11 +4001,11 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -4001,22 +4014,22 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
+      if (fieldFromInstruction(Insn, 4, 2))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -4052,11 +4065,11 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -4065,22 +4078,22 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 8;
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
-        align = 4 << fieldFromInstruction32(Insn, 4, 2);
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 4, 2))
+        align = 4 << fieldFromInstruction(Insn, 4, 2);
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -4126,11 +4139,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -4139,22 +4152,22 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 8;
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
-        align = 4 << fieldFromInstruction32(Insn, 4, 2);
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 4, 2))
+        align = 4 << fieldFromInstruction(Insn, 4, 2);
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -4190,11 +4203,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned Rt  = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm  = fieldFromInstruction32(Insn,  5, 1);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+  unsigned Rt  = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm  = fieldFromInstruction(Insn,  5, 1);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
 
   if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
     S = MCDisassembler::SoftFail;
@@ -4216,11 +4229,11 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned Rt  = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm  = fieldFromInstruction32(Insn,  5, 1);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+  unsigned Rt  = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm  = fieldFromInstruction(Insn,  5, 1);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
 
   if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
     S = MCDisassembler::SoftFail;
@@ -4242,8 +4255,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
                              uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned pred = fieldFromInstruction16(Insn, 4, 4);
-  unsigned mask = fieldFromInstruction16(Insn, 0, 4);
+  unsigned pred = fieldFromInstruction(Insn, 4, 4);
+  unsigned mask = fieldFromInstruction(Insn, 0, 4);
 
   if (pred == 0xF) {
     pred = 0xE;
@@ -4265,13 +4278,13 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned addr = fieldFromInstruction32(Insn, 0, 8);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-  unsigned U = fieldFromInstruction32(Insn, 23, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned addr = fieldFromInstruction(Insn, 0, 8);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
+  unsigned U = fieldFromInstruction(Insn, 23, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
   bool writeback = (W == 1) | (P == 0);
 
   addr |= (U << 8) | (Rn << 9);
@@ -4302,13 +4315,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned addr = fieldFromInstruction32(Insn, 0, 8);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-  unsigned U = fieldFromInstruction32(Insn, 23, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned addr = fieldFromInstruction(Insn, 0, 8);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
+  unsigned U = fieldFromInstruction(Insn, 23, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
   bool writeback = (W == 1) | (P == 0);
 
   addr |= (U << 8) | (Rn << 9);
@@ -4334,13 +4347,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
-  unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+  unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+  unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
   if (sign1 != sign2) return MCDisassembler::Fail;
 
-  unsigned Val = fieldFromInstruction32(Insn, 0, 8);
-  Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
-  Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+  unsigned Val = fieldFromInstruction(Insn, 0, 8);
+  Val |= fieldFromInstruction(Insn, 12, 3) << 8;
+  Val |= fieldFromInstruction(Insn, 26, 1) << 11;
   Val |= sign1 << 12;
   Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
 
@@ -4360,10 +4373,10 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
 
 static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder) {
-  unsigned Rt   = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2  = fieldFromInstruction32(Insn, 0,  4);
-  unsigned Rn   = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rt   = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2  = fieldFromInstruction(Insn, 0,  4);
+  unsigned Rn   = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (pred == 0xF)
     return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -4387,12 +4400,12 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
-  Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
-  unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
-  Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
-  unsigned imm = fieldFromInstruction32(Insn, 16, 6);
-  unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+  unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+  unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+  unsigned imm = fieldFromInstruction(Insn, 16, 6);
+  unsigned cmode = fieldFromInstruction(Insn, 8, 4);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -4415,12 +4428,12 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
-  Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
-  unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
-  Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
-  unsigned imm = fieldFromInstruction32(Insn, 16, 6);
-  unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+  unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+  unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+  unsigned imm = fieldFromInstruction(Insn, 16, 6);
+  unsigned cmode = fieldFromInstruction(Insn, 8, 4);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -4445,13 +4458,13 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Val, 12, 4);
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
-  unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+  unsigned Rn = fieldFromInstruction(Val, 16, 4);
+  unsigned Rt = fieldFromInstruction(Val, 12, 4);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
+  unsigned Cond = fieldFromInstruction(Val, 28, 4);
  
-  if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+  if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
     S = MCDisassembler::SoftFail;
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
@@ -4473,11 +4486,11 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
 
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned CRm = fieldFromInstruction32(Val, 0, 4);
-  unsigned opc1 = fieldFromInstruction32(Val, 4, 4);
-  unsigned cop = fieldFromInstruction32(Val, 8, 4);
-  unsigned Rt = fieldFromInstruction32(Val, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Val, 16, 4);
+  unsigned CRm = fieldFromInstruction(Val, 0, 4);
+  unsigned opc1 = fieldFromInstruction(Val, 4, 4);
+  unsigned cop = fieldFromInstruction(Val, 8, 4);
+  unsigned Rt = fieldFromInstruction(Val, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Val, 16, 4);
 
   if ((cop & ~0x1) == 0xa)
     return MCDisassembler::Fail;
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 042b456538c3..aa5747209b79 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -16,6 +16,7 @@
 #include "MipsRegisterInfo.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
     return MCDisassembler::Fail;
 
   // Calling the auto-generated decoder function.
-  Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+                             this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
@@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr,
     return MCDisassembler::Fail;
 
   // Calling the auto-generated decoder function.
-  Result = decodeMips64Instruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
+                             this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
   }
   // If we fail to decode in Mips64 decoder space we can try in Mips32
-  Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+                             this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
@@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
                               uint64_t Address,
                               const void *Decoder) {
   int Offset = SignExtend32<16>(Insn & 0xffff);
-  unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
-  unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
 
   Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
   Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
                                uint64_t Address,
                                const void *Decoder) {
   int Offset = SignExtend32<16>(Insn & 0xffff);
-  unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
-  unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
 
   Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
   Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
                                      uint64_t Address,
                                      const void *Decoder) {
 
-  unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2;
+  unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
   Inst.addOperand(MCOperand::CreateImm(JumpOffset));
   return MCDisassembler::Success;
 }
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 8548ae0b8b53..90f7942c5b4e 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,6 +44,8 @@ def FeatureN64         : SubtargetFeature<"n64", "MipsABI", "N64",
                                 "Enable n64 ABI">;
 def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
                                 "Enable eabi ABI">;
+def FeatureAndroid     : SubtargetFeature<"android", "IsAndroid", "true",
+                                "Target is android">;
 def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
                                 "true", "Enable vector FPU instructions.">;
 def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 8aadefdcd141..19213fa67305 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -145,6 +145,17 @@ def RetCC_MipsEABI : CallingConv<[
 ]>;
 
 //===----------------------------------------------------------------------===//
+// Mips Android Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MipsAndroid : CallingConv<[
+  // f32 are returned in registers F0, F2, F1, F3
+  CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
+
+  CCDelegateTo<RetCC_MipsO32>
+]>;
+
+//===----------------------------------------------------------------------===//
 // Mips FastCC Calling Convention
 //===----------------------------------------------------------------------===//
 def CC_MipsO32_FastCC : CallingConv<[
@@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[
   CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
   CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
   CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+  CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
   CCDelegateTo<RetCC_MipsO32>
 ]>;
 
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 3215c44be0ef..ba15362f07b0 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -89,6 +89,9 @@ protected:
   // InMips16 -- can process Mips16 instructions
   bool InMips16Mode;
 
+  // IsAndroid -- target is android
+  bool IsAndroid;
+
   InstrItineraryData InstrItins;
 
 public:
@@ -128,6 +131,7 @@ public:
   bool isNotSingleFloat() const { return !IsSingleFloat; }
   bool hasVFPU() const { return HasVFPU; }
   bool inMips16Mode() const { return InMips16Mode; }
+  bool isAndroid() const { return IsAndroid; }
   bool isLinux() const { return IsLinux; }
 
   bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index aa819eeb30a2..61d44c52d438 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
-  // We do not currently implment this libm ops for PowerPC.
+  // We do not currently implement these libm ops for PowerPC.
   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6c1a816c9ff2..18e6b7c3d9b6 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,14 +17,14 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// X86 Subtarget state.
+// X86 Subtarget state
 //
 
 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
                                   "64-bit mode (x86_64)">;
 
 //===----------------------------------------------------------------------===//
-// X86 Subtarget features.
+// X86 Subtarget features
 //===----------------------------------------------------------------------===//
 
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
@@ -97,7 +97,7 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                       [FeatureAVX, FeatureSSE4A]>;
 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
                                       "Enable XOP instructions",
-                                      [FeatureAVX, FeatureSSE4A]>;
+                                      [FeatureFMA4]>;
 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
                                           "HasVectorUAMem", "true",
                  "Allow unaligned memory operands on vector/SIMD instructions">;
@@ -226,7 +226,7 @@ def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePCLMUL,
                                FeatureF16C, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureBMI]>;
+                               FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [Feature3DNow]>;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ea66a6115d7e..c77355f91796 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  SDNode *N = Op.getNode();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = Op.getNumOperands();
+
+  // Check supported types and sub-targets.
+  //
+  // Only v2f32 -> v2f64 needs special handling.
+  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+    return SDValue();
+
+  SDValue VecIn;
+  EVT VecInVT;
+  SmallVector<int, 8> Mask;
+  EVT SrcVT = MVT::Other;
+
+  // Check the patterns could be translated into X86vfpext.
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opcode = In.getOpcode();
+
+    // Skip if the element is undefined.
+    if (Opcode == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    // Quit if one of the elements is not defined from 'fpext'.
+    if (Opcode != ISD::FP_EXTEND)
+      return SDValue();
+
+    // Check how the source of 'fpext' is defined.
+    SDValue L2In = In.getOperand(0);
+    EVT L2InVT = L2In.getValueType();
+
+    // Check the original type
+    if (SrcVT == MVT::Other)
+      SrcVT = L2InVT;
+    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+      return SDValue();
+
+    // Check whether the value being 'fpext'ed is extracted from the same
+    // source.
+    Opcode = L2In.getOpcode();
+
+    // Quit if it's not extracted with a constant index.
+    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(L2In.getOperand(1)))
+      return SDValue();
+
+    SDValue ExtractedFromVec = L2In.getOperand(0);
+
+    if (VecIn.getNode() == 0) {
+      VecIn = ExtractedFromVec;
+      VecInVT = ExtractedFromVec.getValueType();
+    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+      return SDValue();
+
+    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+  }
+
+  // Fill the remaining mask as undef.
+  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+    Mask.push_back(-1);
+
+  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+                     DAG.getVectorShuffle(VecInVT, DL,
+                                          VecIn, DAG.getUNDEF(VecInVT),
+                                          &Mask[0]));
+}
+
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   if (Broadcast.getNode())
     return Broadcast;
 
+  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+  if (FpExt.getNode())
+    return FpExt;
+
   unsigned EVTBits = ExtVT.getSizeInBits();
 
   unsigned NumZero  = 0;
@@ -11122,9 +11202,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
   Results.push_back(Swap.getValue(1));
 }
 
-void X86TargetLowering::
+static void
 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
-                        SelectionDAG &DAG, unsigned NewOp) const {
+                        SelectionDAG &DAG, unsigned NewOp) {
   DebugLoc dl = Node->getDebugLoc();
   assert (Node->getValueType(0) == MVT::i64 &&
           "Only know how to expand i64 atomics");
@@ -11245,26 +11325,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::ATOMIC_LOAD_ADD:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_AND:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_NAND:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_OR:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_SUB:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_XOR:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
-    return;
-  case ISD::ATOMIC_SWAP:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+  case ISD::ATOMIC_SWAP: {
+    unsigned Opc;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unexpected opcode");
+    case ISD::ATOMIC_LOAD_ADD:
+      Opc = X86ISD::ATOMADD64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_AND:
+      Opc = X86ISD::ATOMAND64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_NAND:
+      Opc = X86ISD::ATOMNAND64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_OR:
+      Opc = X86ISD::ATOMOR64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_SUB:
+      Opc = X86ISD::ATOMSUB64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_XOR:
+      Opc = X86ISD::ATOMXOR64_DAG;
+      break;
+    case ISD::ATOMIC_SWAP:
+      Opc = X86ISD::ATOMSWAP64_DAG;
+      break;
+    }
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
     return;
+  }
   case ISD::ATOMIC_LOAD:
     ReplaceATOMIC_LOAD(N, Results, DAG);
   }
@@ -11342,7 +11436,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ATOMAND64_DAG:      return "X86ISD::ATOMAND64_DAG";
   case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
+  case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -12792,16 +12888,31 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     // String/text processing lowering.
   case X86::PCMPISTRM128REG:
   case X86::VPCMPISTRM128REG:
-    return EmitPCMP(MI, BB, 3, false /* in-mem */);
   case X86::PCMPISTRM128MEM:
   case X86::VPCMPISTRM128MEM:
-    return EmitPCMP(MI, BB, 3, true /* in-mem */);
   case X86::PCMPESTRM128REG:
   case X86::VPCMPESTRM128REG:
-    return EmitPCMP(MI, BB, 5, false /* in mem */);
   case X86::PCMPESTRM128MEM:
-  case X86::VPCMPESTRM128MEM:
-    return EmitPCMP(MI, BB, 5, true /* in mem */);
+  case X86::VPCMPESTRM128MEM: {
+    unsigned NumArgs;
+    bool MemArg;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::PCMPISTRM128REG:
+    case X86::VPCMPISTRM128REG:
+      NumArgs = 3; MemArg = false; break;
+    case X86::PCMPISTRM128MEM:
+    case X86::VPCMPISTRM128MEM:
+      NumArgs = 3; MemArg = true; break;
+    case X86::PCMPESTRM128REG:
+    case X86::VPCMPESTRM128REG:
+      NumArgs = 5; MemArg = false; break;
+    case X86::PCMPESTRM128MEM:
+    case X86::VPCMPESTRM128MEM:
+      NumArgs = 5; MemArg = true; break;
+    }
+    return EmitPCMP(MI, BB, NumArgs, MemArg);
+  }
 
     // Thread synchronization.
   case X86::MONITOR:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9123ebd8ae49..896d067fda75 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -227,6 +227,9 @@ namespace llvm {
       // VSEXT_MOVL - Vector move low and sign extend.
       VSEXT_MOVL,
 
+      // VFPEXT - Vector FP extend.
+      VFPEXT,
+
       // VSHL, VSRL - 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -828,6 +831,8 @@ namespace llvm {
     SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
     SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
@@ -859,9 +864,6 @@ namespace llvm {
                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                    LLVMContext &Context) const;
 
-    void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp) const;
-
     /// Utility function to emit string processing sse4.2 instructions
     /// that return in xmm0.
     /// This takes the instruction to expand, the associated machine basic
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index d13167bb05db..1db68c86b76d 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -81,6 +81,11 @@ def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",
 
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vfpext  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>]>>;
+
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e4c35b9bc556..20dc81eb4a37 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2101,12 +2101,20 @@ let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
             (VCVTPD2PSYrm addr:$src)>;
 
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (VCVTPS2PDrr VR128:$src)>;
   def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
             (VCVTPS2PDYrr VR128:$src)>;
   def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
             (VCVTPS2PDYrm addr:$src)>;
 }
 
+let Predicates = [HasSSE2] in {
+  // Match fextend for 128 conversions
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (CVTPS2PDrr VR128:$src)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Compare Instructions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d34fab103fa3..cbe1ca4ddcec 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   // if the size is something we can handle with a single primitive load/store.
   // A single load+store correctly handles overlapping memory in the memmove
   // case.
-  unsigned Size = MemOpLength->getZExtValue();
-  if (Size == 0) return MI;  // Delete this mem transfer.
+  uint64_t Size = MemOpLength->getLimitedValue();
+  assert(Size && "0-sized memory transfering should be removed already.");
 
   if (Size > 8 || (Size&(Size-1)))
     return 0;  // If not 1/2/4/8 bytes, exit.
@@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
   if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
     return 0;
-  uint64_t Len = LenC->getZExtValue();
+  uint64_t Len = LenC->getLimitedValue();
   Alignment = MI->getAlignment();
-
-  // If the length is zero, this is a no-op
-  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  assert(Len && "0-sized memory setting should be removed already.");
 
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index bf35eacd69fa..17b83ceee194 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -86,6 +86,9 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
 static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
        cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
        cl::Hidden, cl::init(true));
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+       cl::desc("use instrumentation with slow path for all accesses"),
+       cl::Hidden, cl::init(false));
 // This flag limits the number of instructions to be instrumented
 // in any given BB. Normally, this should be set to unlimited (INT_MAX),
 // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
@@ -159,7 +162,7 @@ struct AddressSanitizer : public ModulePass {
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
   Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                            Value *ShadowValue, uint32_t TypeSize);
-  Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC,
+  Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
                                  bool IsWrite, size_t AccessSizeIndex);
   bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
   void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
@@ -251,24 +254,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
 //     ThenBlock
 //   Tail
 //
-// If ThenBlock is zero, a new block is created and its terminator is returned.
-// Otherwize 0 is returned.
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp,
-                                             BasicBlock *ThenBlock = 0) {
+// ThenBlock block is created and its terminator is returned.
+// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
+// it is terminated with BranchInst to Tail.
+static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
   Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
   BasicBlock *Head = SplitBefore->getParent();
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
   TerminatorInst *HeadOldTerm = Head->getTerminator();
-  BranchInst *CheckTerm = 0;
-  if (!ThenBlock) {
-    LLVMContext &C = Head->getParent()->getParent()->getContext();
-    ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  LLVMContext &C = Head->getParent()->getParent()->getContext();
+  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  TerminatorInst *CheckTerm;
+  if (Unreachable)
+    CheckTerm = new UnreachableInst(C, ThenBlock);
+  else
     CheckTerm = BranchInst::Create(Tail, ThenBlock);
-  }
   BranchInst *HeadNewTerm =
     BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
   return CheckTerm;
 }
 
@@ -320,7 +323,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
 
     Value *Cmp = IRB.CreateICmpNE(Length,
                                   Constant::getNullValue(Length->getType()));
-    InsertBefore = splitBlockAndInsertIfThen(Cmp);
+    InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
   }
 
   instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
@@ -391,15 +394,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
 }
 
 Instruction *AddressSanitizer::generateCrashCode(
-    BasicBlock *BB, Value *Addr, Value *PC,
+    Instruction *InsertBefore, Value *Addr,
     bool IsWrite, size_t AccessSizeIndex) {
-  IRBuilder<> IRB(BB->getFirstNonPHI());
-  CallInst *Call;
-  if (PC)
-    Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex],
-                           Addr, PC);
-  else
-    Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+  IRBuilder<> IRB(InsertBefore);
+  CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
+                                  Addr);
   // We don't do Call->setDoesNotReturn() because the BB already has
   // UnreachableInst at the end.
   // This EmptyAsm is required to avoid callback merge.
@@ -420,7 +419,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
         LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
   // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
   LastAccessedByte = IRB.CreateIntCast(
-      LastAccessedByte, IRB.getInt8Ty(), false);
+      LastAccessedByte, ShadowValue->getType(), false);
   // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
   return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
 }
@@ -440,26 +439,27 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
       IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
 
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
-
-  BasicBlock *CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F);
-  new UnreachableInst(*C, CrashBlock);
   size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
-  Instruction *Crash =
-      generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex);
-  Crash->setDebugLoc(OrigIns->getDebugLoc());
-
   size_t Granularity = 1 << MappingScale;
-  if (TypeSize < 8 * Granularity) {
-    BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp);
-    assert(CheckTerm->isUnconditional());
+  TerminatorInst *CrashTerm = 0;
+
+  if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+    TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+    BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+    CrashTerm = new UnreachableInst(*C, CrashBlock);
     BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
     ReplaceInstWithInst(CheckTerm, NewTerm);
   } else {
-    splitBlockAndInsertIfThen(Cmp, CrashBlock);
+    CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
   }
+
+  Instruction *Crash =
+      generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+  Crash->setDebugLoc(OrigIns->getDebugLoc());
 }
 
 // This function replaces all global variables with new variables that have
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index bc87106b3d21..a3c426a714e0 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts(
   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
   cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
-// FIXME: Remove this abomination once all of the tests pass without it!
-static cl::opt<bool> DisableDeleteDeadBlocks(
-  "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
-  cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
-
 static cl::opt<bool> DisableSelectToBranch(
   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
   cl::desc("Disable select to branch conversion."));
@@ -188,10 +183,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
           WorkList.insert(*II);
     }
 
-    if (!DisableDeleteDeadBlocks)
-      for (SmallPtrSet<BasicBlock*, 8>::iterator
-             I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
-        DeleteDeadBlock(*I);
+    for (SmallPtrSet<BasicBlock*, 8>::iterator
+           I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+      DeleteDeadBlock(*I);
 
     // Merge pairs of basic blocks with unconditional branches, connected by
     // a single edge.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 120175d5f7f2..4822fd09448c 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -613,8 +613,8 @@ namespace {
     void verifyRemoved(const Instruction *I) const;
     bool splitCriticalEdges();
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
-                                         const BasicBlock *Root);
-    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root);
+                                         const BasicBlockEdge &Root);
+    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
   };
 
   char GVN::ID = 0;
@@ -2004,22 +2004,13 @@ Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
 /// use is dominated by the given basic block.  Returns the number of uses that
 /// were replaced.
 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
-                                          const BasicBlock *Root) {
+                                          const BasicBlockEdge &Root) {
   unsigned Count = 0;
   for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
        UI != UE; ) {
     Use &U = (UI++).getUse();
 
-    // If From occurs as a phi node operand then the use implicitly lives in the
-    // corresponding incoming block.  Otherwise it is the block containing the
-    // user that must be dominated by Root.
-    BasicBlock *UsingBlock;
-    if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
-      UsingBlock = PN->getIncomingBlock(U);
-    else
-      UsingBlock = cast<Instruction>(U.getUser())->getParent();
-
-    if (DT->dominates(Root, UsingBlock)) {
+    if (DT->dominates(Root, U)) {
       U.set(To);
       ++Count;
     }
@@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
   return Count;
 }
 
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
+/// true if every path from the entry block to 'Dst' passes via this edge.  In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+                                       DominatorTree *DT) {
+  // While in theory it is interesting to consider the case in which Dst has
+  // more than one predecessor, because Dst might be part of a loop which is
+  // only reachable from Src, in practice it is pointless since at the time
+  // GVN runs all such loops have preheaders, which means that Dst will have
+  // been changed to have only one predecessor, namely Src.
+  const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+  const BasicBlock *Src = E.getStart();
+  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+  (void)Src;
+  return Pred != 0;
+}
+
 /// propagateEquality - The given values are known to be equal in every block
 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+                            const BasicBlockEdge &Root) {
   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
   Worklist.push_back(std::make_pair(LHS, RHS));
   bool Changed = false;
+  // For speed, compute a conservative fast approximation to
+  // DT->dominates(Root, Root.getEnd());
+  bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
 
   while (!Worklist.empty()) {
     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
         LVN = RVN;
       }
     }
-    assert((!isa<Instruction>(RHS) ||
-            DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
-           "Instruction doesn't dominate scope!");
 
     // If value numbering later sees that an instruction in the scope is equal
     // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
@@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
     // if RHS is an instruction (if an instruction in the scope is morphed into
     // LHS then it will be turned into RHS by the next GVN iteration anyway, so
     // using the leader table is about compiling faster, not optimizing better).
-    if (!isa<Instruction>(RHS))
-      addToLeaderTable(LVN, RHS, Root);
+    // The leader table only tracks basic blocks, not edges. Only add to if we
+    // have the simple case where the edge dominates the end.
+    if (RootDominatesEnd && !isa<Instruction>(RHS))
+      addToLeaderTable(LVN, RHS, Root.getEnd());
 
     // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
     // LHS always has at least one use that is not dominated by Root, this will
@@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
       // If the number we were assigned was brand new then there is no point in
       // looking for an instruction realizing it: there cannot be one!
       if (Num < NextNum) {
-        Value *NotCmp = findLeader(Root, Num);
+        Value *NotCmp = findLeader(Root.getEnd(), Num);
         if (NotCmp && isa<Instruction>(NotCmp)) {
           unsigned NumReplacements =
             replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
@@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
       }
       // Ensure that any instruction in scope that gets the "A < B" value number
       // is replaced with false.
-      addToLeaderTable(Num, NotVal, Root);
+      // The leader table only tracks basic blocks, not edges. Only add to if we
+      // have the simple case where the edge dominates the end.
+      if (RootDominatesEnd)
+        addToLeaderTable(Num, NotVal, Root.getEnd());
 
       continue;
     }
@@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlock *Root) {
   return Changed;
 }
 
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
-/// true if every path from the entry block to 'Dst' passes via this edge.  In
-/// particular 'Dst' must not be reachable via another edge from 'Src'.
-static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
-                                       DominatorTree *DT) {
-  // While in theory it is interesting to consider the case in which Dst has
-  // more than one predecessor, because Dst might be part of a loop which is
-  // only reachable from Src, in practice it is pointless since at the time
-  // GVN runs all such loops have preheaders, which means that Dst will have
-  // been changed to have only one predecessor, namely Src.
-  BasicBlock *Pred = Dst->getSinglePredecessor();
-  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
-  (void)Src;
-  return Pred != 0;
-}
-
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I) {
@@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) {
 
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
+    // Avoid multiple edges early.
+    if (TrueSucc == FalseSucc)
+      return false;
+
     BasicBlock *Parent = BI->getParent();
     bool Changed = false;
 
-    if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
-      Changed |= propagateEquality(BranchCond,
-                                   ConstantInt::getTrue(TrueSucc->getContext()),
-                                   TrueSucc);
+    Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+    BasicBlockEdge TrueE(Parent, TrueSucc);
+    Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
 
-    if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
-      Changed |= propagateEquality(BranchCond,
-                                   ConstantInt::getFalse(FalseSucc->getContext()),
-                                   FalseSucc);
+    Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+    BasicBlockEdge FalseE(Parent, FalseSucc);
+    Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
 
     return Changed;
   }
@@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) {
     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
          i != e; ++i) {
       BasicBlock *Dst = i.getCaseSuccessor();
-      if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
-        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
+      BasicBlockEdge E(Parent, Dst);
+      if (E.isSingleEdge())
+        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
     }
     return Changed;
   }
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 682d928e4da3..60bdeac16b36 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -39,20 +39,17 @@ static cl::opt<bool,true>
 VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
                cl::desc("Verify dominator info (time consuming)"));
 
-namespace llvm {
-  class BasicBlockEdge {
-    const BasicBlock *Start;
-    const BasicBlock *End;
-  public:
-    BasicBlockEdge(const BasicBlock *Start_, const BasicBlock *End_) :
-      Start(Start_), End(End_) { }
-    const BasicBlock *getStart() const {
-      return Start;
-    }
-    const BasicBlock *getEnd() const {
-      return End;
-    }
-  };
+bool BasicBlockEdge::isSingleEdge() const {
+  const TerminatorInst *TI = Start->getTerminator();
+  unsigned NumEdgesToEnd = 0;
+  for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
+    if (TI->getSuccessor(i) == End)
+      ++NumEdgesToEnd;
+    if (NumEdgesToEnd >= 2)
+      return false;
+  }
+  assert(NumEdgesToEnd == 1);
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 685124628198..38914b3fe7ec 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1093,7 +1093,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
 
   // BitCast implies a no-op cast of type only. No bits change.
   // However, you can't cast pointers to anything but pointers.
-  Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+  Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
           "Bitcast requires both operands to be pointer or neither", &I);
   Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
 
diff --git a/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
index eee4ec4333c9..3f04e2e21c46 100644
--- a/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
@@ -16,7 +16,7 @@
 ; CHECK: for.body:
 ; CHECK: %inc.9 = add i8 %inc.8, 1
 ; CHECK: %0 = add i8 %inc1, 10
-; CHEKC: br label %for.cond
+; CHECK: br label %for.cond
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 define void @func() noreturn nounwind uwtable ssp {
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index 2faa04af8bac..e84ce0e2394d 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
@@ -21,12 +21,6 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
   %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
   %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
   %inc = add i32 %i.022, 1
-  br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i
-
-if.then.i:                                        ; preds = %for.body
-  unreachable
-
-_Z14printIsNotZeroi.exit:                         ; preds = %for.body
   %tmp8 = load i32* %x, align 4, !tbaa !0
   %tmp11 = load i32* %y, align 4, !tbaa !0
   %mul = mul nsw i32 %tmp11, %tmp8
@@ -37,7 +31,7 @@ if.then.i16:                                      ; preds = %_Z14printIsNotZeroi
   unreachable
 
 _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi.exit
-  br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end
+  br label %_Z14printIsNotZeroi.exit17.for.body_crit_edge
 
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 6fbae199aaed..89c01d58c398 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
 ; CHECK:      vstmia  {{.*}}
@@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
 ; CHECK:      vstmia  {{.*}}
@@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
 ; CHECK:      vstmia  {{.*}}
@@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
 ; CHECK:      vstmia  {{.*}}
@@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
 ; CHECK:      vstmia  {{.*}}
@@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
 ; CHECK:      vstmia  {{.*}}
@@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
 ; CHECK:      vstmia  {{.*}}
@@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{[mov|vmov.32]}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
 ; CHECK:      vstmia  {{.*}}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index edc805a47d6a..b6c9098613fe 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
@@ -221,3 +223,67 @@ entry:
 }
 
 declare i32 @CallVariadic(i32, ...)
+
+; Test fastcc
+
+define fastcc void @fast_callee(float %i) ssp {
+entry:
+; ARM: fast_callee
+; ARM: vmov r0, s0
+; THUMB: fast_callee
+; THUMB: vmov r0, s0
+; ARM-NOVFP: fast_callee
+; ARM-NOVFP-NOT: s0
+; THUMB-NOVFP: fast_callee
+; THUMB-NOVFP-NOT: s0
+  call void @print(float %i)
+  ret void
+}
+
+define void @fast_caller() ssp {
+entry:
+; ARM: fast_caller
+; ARM: vldr s0,
+; THUMB: fast_caller
+; THUMB: vldr s0,
+; ARM-NOVFP: fast_caller
+; ARM-NOVFP: movw r0, #13107
+; ARM-NOVFP: movt r0, #16611
+; THUMB-NOVFP: fast_caller
+; THUMB-NOVFP: movw r0, #13107
+; THUMB-NOVFP: movt r0, #16611
+  call fastcc void @fast_callee(float 0x401C666660000000)
+  ret void
+}
+
+define void @no_fast_callee(float %i) ssp {
+entry:
+; ARM: no_fast_callee
+; ARM: vmov s0, r0
+; THUMB: no_fast_callee
+; THUMB: vmov s0, r0
+; ARM-NOVFP: no_fast_callee
+; ARM-NOVFP-NOT: s0
+; THUMB-NOVFP: no_fast_callee
+; THUMB-NOVFP-NOT: s0
+  call void @print(float %i)
+  ret void
+}
+
+define void @no_fast_caller() ssp {
+entry:
+; ARM: no_fast_caller
+; ARM: vmov r0, s0
+; THUMB: no_fast_caller
+; THUMB: vmov r0, s0
+; ARM-NOVFP: no_fast_caller
+; ARM-NOVFP: movw r0, #13107
+; ARM-NOVFP: movt r0, #16611
+; THUMB-NOVFP: no_fast_caller
+; THUMB-NOVFP: movw r0, #13107
+; THUMB-NOVFP: movt r0, #16611
+  call void @no_fast_callee(float 0x401C666660000000)
+  ret void
+}
+
+declare void @print(float)
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index c5583b94befd..1261ea502129 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -15,14 +15,14 @@ entry:
   %1 = load i16* @y, align 2
   %2 = tail call float @llvm.convert.from.fp16(i16 %0)
 ; CHECK: __gnu_h2f_ieee
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
   %3 = tail call float @llvm.convert.from.fp16(i16 %1)
 ; CHECK: __gnu_h2f_ieee
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
   %4 = fadd float %2, %3
   %5 = tail call i16 @llvm.convert.to.fp16(float %4)
 ; CHECK: __gnu_f2h_ieee
-; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f16.f32
   store i16 %5, i16* @x, align 2
   ret void
 }
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 418d4f31ee2b..557556662892 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,11 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
+; CHECK-NEON:      movw    [[R3:r[0-9]+]], #1123
 ; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
-; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
-; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
 ; CHECK-NEON-NEXT: cmp     r0, [[R3]]
-; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
+; CHECK-NEON-NEXT: it      eq
+; CHECK-NEON-NEXT: addeq.w {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index ca2e18a63949..26f7cb68901f 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -4,13 +4,13 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; ARM: t1:
-; ARM: sub r0, r1, #-2147483647
-; ARM: movgt r0, r1
+; ARM: suble r1, r1, #-2147483647
+; ARM: mov r0, r1
 
 ; T2: t1:
 ; T2: mvn r0, #-2147483648
-; T2: add r0, r1
-; T2: movgt r0, r1
+; T2: addle.w r1, r1
+; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
   %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
   %tmp3 = add i32 %tmp2, %b
@@ -19,12 +19,12 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; ARM: t2:
-; ARM: sub r0, r1, #10
-; ARM: movgt r0, r1
+; ARM: suble r1, r1, #10
+; ARM: mov r0, r1
 
 ; T2: t2:
-; T2: sub.w r0, r1, #10
-; T2: movgt r0, r1
+; T2: suble.w r1, r1, #10
+; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
   %tmp2 = select i1 %tmp1, i32 0, i32 10
   %tmp3 = sub i32 %b, %tmp2
@@ -104,3 +104,78 @@ entry:
   ret i32 %tmp3
 }
 
+; Fold ORRri into movcc.
+define i32 @t8(i32 %a, i32 %b) nounwind {
+; ARM: t8:
+; ARM: cmp r0, r1
+; ARM: orrge r0, r1, #1
+
+; T2: t8:
+; T2: cmp r0, r1
+; T2: orrge r0, r1, #1
+  %x = or i32 %b, 1
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold ANDrr into movcc.
+define i32 @t9(i32 %a, i32 %b, i32 %c) nounwind {
+; ARM: t9:
+; ARM: cmp r0, r1
+; ARM: andge r0, r1, r2
+
+; T2: t9:
+; T2: cmp r0, r1
+; T2: andge.w r0, r1, r2
+  %x = and i32 %b, %c
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold EORrs into movcc.
+define i32 @t10(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t10:
+; ARM: cmp r0, r1
+; ARM: eorge r0, r1, r2, lsl #7
+
+; T2: t10:
+; T2: cmp r0, r1
+; T2: eorge.w r0, r1, r2, lsl #7
+  %s = shl i32 %c, 7
+  %x = xor i32 %b, %s
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold ORRri into movcc, reversing the condition.
+define i32 @t11(i32 %a, i32 %b) nounwind {
+; ARM: t11:
+; ARM: cmp r0, r1
+; ARM: orrlt r0, r1, #1
+
+; T2: t11:
+; T2: cmp r0, r1
+; T2: orrlt r0, r1, #1
+  %x = or i32 %b, 1
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %x, i32 %a
+  ret i32 %tmp1
+}
+
+; Fold ADDri12 into movcc
+define i32 @t12(i32 %a, i32 %b) nounwind {
+; ARM: t12:
+; ARM: cmp r0, r1
+; ARM: addge r0, r1,
+
+; T2: t12:
+; T2: cmp r0, r1
+; T2: addwge r0, r1, #3000
+  %x = add i32 %b, 3000
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index a8237c60e4e0..869b92675def 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
-; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
-; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
-; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED
 
 ; rdar://7113725
+; rdar://12091029
 
 define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-; GENERIC: t:
-; GENERIC: ldrb [[R2:r[0-9]+]]
-; GENERIC: ldrb [[R3:r[0-9]+]]
-; GENERIC: ldrb [[R12:r[0-9]+]]
-; GENERIC: ldrb [[R1:r[0-9]+]]
-; GENERIC: strb [[R1]]
-; GENERIC: strb [[R12]]
-; GENERIC: strb [[R3]]
-; GENERIC: strb [[R2]]
-
-; DARWIN_V6: t:
-; DARWIN_V6: ldr r1
-; DARWIN_V6: str r1
+; EXPANDED: t:
+; EXPANDED: ldrb [[R2:r[0-9]+]]
+; EXPANDED: ldrb [[R3:r[0-9]+]]
+; EXPANDED: ldrb [[R12:r[0-9]+]]
+; EXPANDED: ldrb [[R1:r[0-9]+]]
+; EXPANDED: strb [[R1]]
+; EXPANDED: strb [[R12]]
+; EXPANDED: strb [[R3]]
+; EXPANDED: strb [[R2]]
+
+; UNALIGNED: t:
+; UNALIGNED: ldr r1
+; UNALIGNED: str r1
 
   %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
   %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
@@ -27,3 +27,35 @@ entry:
   store i32 %tmp.i, i32* %__dest2.i, align 1
   ret void
 }
+
+define void @hword(double* %a, double* %b) nounwind {
+entry:
+; EXPANDED: hword:
+; EXPANDED-NOT: vld1
+; EXPANDED: ldrh
+; EXPANDED-NOT: str1
+; EXPANDED: strh
+
+; UNALIGNED: hword:
+; UNALIGNED: vld1.16
+; UNALIGNED: vst1.16
+  %tmp = load double* %a, align 2
+  store double %tmp, double* %b, align 2
+  ret void
+}
+
+define void @byte(double* %a, double* %b) nounwind {
+entry:
+; EXPANDED: byte:
+; EXPANDED-NOT: vld1
+; EXPANDED: ldrb
+; EXPANDED-NOT: str1
+; EXPANDED: strb
+
+; UNALIGNED: byte:
+; UNALIGNED: vld1.8
+; UNALIGNED: vst1.8
+  %tmp = load double* %a, align 1
+  store double %tmp, double* %b, align 1
+  ret void
+}
diff --git a/test/CodeGen/Generic/donothing.ll b/test/CodeGen/Generic/donothing.ll
index d6ba138fc6da..3727b60a1a45 100644
--- a/test/CodeGen/Generic/donothing.ll
+++ b/test/CodeGen/Generic/donothing.ll
@@ -7,7 +7,7 @@ declare void @llvm.donothing() readnone
 ; CHECK: f1
 define void @f1() nounwind uwtable ssp {
 entry:
-; CHECK-NOT donothing
+; CHECK-NOT: donothing
   invoke void @llvm.donothing()
   to label %invoke.cont unwind label %lpad
 
@@ -25,7 +25,7 @@ lpad:
 ; CHECK: f2
 define void @f2() nounwind {
 entry:
-; CHECK-NOT donothing
+; CHECK-NOT: donothing
   call void @llvm.donothing()
   ret void
 }
diff --git a/test/CodeGen/Mips/return-vector-float4.ll b/test/CodeGen/Mips/return-vector-float4.ll
new file mode 100644
index 000000000000..ae10f123e4d2
--- /dev/null
+++ b/test/CodeGen/Mips/return-vector-float4.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mipsel -mattr=+android < %s | FileCheck %s
+
+define <4 x float> @retvec4() nounwind readnone {
+entry:
+; CHECK: lwc1 $f0
+; CHECK: lwc1 $f2
+; CHECK: lwc1 $f1
+; CHECK: lwc1 $f3
+
+  ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+}
+
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 82857425a9d7..01df37323252 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -95,7 +95,7 @@ bb.nph:
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
 ; CHECK: eor.w
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eorne.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
   %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index 74729fd4150f..ead198f21624 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -4,9 +4,9 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
 ; CHECK: cmp r2, #10
-; CHECK: add r0, r1
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: addle.w r1, r1, r0
+; CHECK: mov r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
         %tmp3 = add i32 %tmp2, %b
@@ -15,10 +15,10 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 
 define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t2
-; CHECK: add.w r0, r1, #-2147483648
 ; CHECK: cmp r2, #10
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: addle.w r1, r1, #-2147483648
+; CHECK: mov r0, r1
 
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
@@ -28,10 +28,10 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 
 define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t3
-; CHECK: sub.w r0, r1, #10
 ; CHECK: cmp r2, #10
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: suble.w r1, r1, #10
+; CHECK: mov r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 10
         %tmp3 = sub i32 %b, %tmp2
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 4d5f8d7857c0..a95dcb580702 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,7 +3,7 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .section        .ctors.64535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_1
 ; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_2
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
new file mode 100644
index 000000000000..a65e6881540d
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; Check that an overly large immediate created by SROA doesn't crash the
+; legalizer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct._GtkSheetRow = type { i32*, i32, i32, i32, %struct._GtkSheetButton, i32, i32 }
+%struct._GtkSheetButton = type { i32, i32*, i32, i32*, i32 }
+
+@a = common global %struct._GtkSheetRow* null, align 8
+
+define void @fn1() nounwind uwtable ssp {
+entry:
+  %0 = load %struct._GtkSheetRow** @a, align 8
+  %1 = bitcast %struct._GtkSheetRow* %0 to i576*
+  %srcval2 = load i576* %1, align 8
+  %tobool = icmp ugt i576 %srcval2, 57586096570152913699974892898380567793532123114264532903689671329431521032595044740083720782129802971518987656109067457577065805510327036019308994315074097345724415
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i576 %srcval2, i576* %1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+
+; CHECK: fn1:
+; CHECK: shrq $32, [[REG:%.*]]
+; CHECK: testq [[REG]], [[REG]]
+; CHECK: je
+}
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19f38882a6c6..4caa3a039d6a 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -57,6 +57,6 @@ entry:
 ; CHECK: subl $28
 ; CHECK: leal (%esp), %ecx
 ; CHECK: calll _test4fastccsret
-; CHECK addl $28
+; CHECK: addl $28
 }
 declare fastcc void @test4fastccsret(%struct.a* sret)
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 1344cdcd4320..0729dda4a12b 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -10,7 +10,7 @@ define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C,
 ; CHECK: foo
 ; CHECK: addl
 ; CHECK: addl
-; CEHCK: addl
+; CHECK: addl
 
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 86c6862a53fc..39c7fbafd4c7 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -65,18 +65,18 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
 ; LINUX: test4:
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
 }
 
 
diff --git a/test/CodeGen/X86/pr11334.ll b/test/CodeGen/X86/pr11334.ll
new file mode 100644
index 000000000000..5b7b5eab87ec
--- /dev/null
+++ b/test/CodeGen/X86/pr11334.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+
+define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
+entry:
+; CHECK: v2f2d_ext_vec
+; CHECK: cvtps2pd
+; AVX:   v2f2d_ext_vec
+; AVX:   vcvtps2pd
+  %f1 = fpext <2 x float> %v1 to <2 x double>
+  ret <2 x double> %f1
+}
+
+define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
+entry:
+; CHECK: v3f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v3f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <3 x float> %v1 to <3 x double>
+  ret <3 x double> %f1
+}
+
+define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
+entry:
+; CHECK: v4f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v4f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <4 x float> %v1 to <4 x double>
+  ret <4 x double> %f1
+}
+
+define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
+entry:
+; CHECK: v8f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v8f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   vextractf128
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <8 x float> %v1 to <8 x double>
+  ret <8 x double> %f1
+}
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
deleted file mode 100644
index b066297ff1b4..000000000000
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
-
-define void @test5() nounwind optsize noinline ssp {
-entry:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-  %buf = alloca [64 x i8], align 16
-  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false)
-  br i1 false, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  unreachable
-
-if.end:                                           ; preds = %entry
-  ret void
-}
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 294ca8ab7560..d19000187060 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -23,15 +23,14 @@ define i32 @test_load(i32* %a) address_safety {
 ; CHECK:   icmp sge i8 %{{.*}}, %[[LOAD_SHADOW]]
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
-; The actual load comes next because ASan adds the crash block
-; to the end of the function.
-; CHECK:   %tmp1 = load i32* %a
-; CHECK:   ret i32 %tmp1
-
 ; The crash block reports the error.
 ; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]])
 ; CHECK:   unreachable
 ;
+; The actual load.
+; CHECK:   %tmp1 = load i32* %a
+; CHECK:   ret i32 %tmp1
+
 
 
 entry:
@@ -57,15 +56,14 @@ define void @test_store(i32* %a) address_safety {
 ; CHECK:   icmp sge i8 %{{.*}}, %[[STORE_SHADOW]]
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
-; The actual load comes next because ASan adds the crash block
-; to the end of the function.
-; CHECK:   store i32 42, i32* %a
-; CHECK:   ret void
-;
 ; The crash block reports the error.
 ; CHECK:   call void @__asan_report_store4(i64 %[[STORE_ADDR]])
 ; CHECK:   unreachable
 ;
+; The actual load.
+; CHECK:   store i32 42, i32* %a
+; CHECK:   ret void
+;
 
 entry:
   store i32 42, i32* %a
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
new file mode 100644
index 000000000000..32392f3ab0c8
--- /dev/null
+++ b/test/Transforms/GVN/edge.ll
@@ -0,0 +1,60 @@
+; RUN: opt %s -gvn -S -o - | FileCheck %s
+
+define i32 @f1(i32 %x) {
+  ; CHECK: define i32 @f1(
+bb0:
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %bb2, label %bb1
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+define i32 @f2(i32 %x) {
+  ; CHECK: define i32 @f2(
+bb0:
+  %cmp = icmp ne i32 %x, 0
+  br i1 %cmp, label %bb1, label %bb2
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+define i32 @f3(i32 %x) {
+  ; CHECK: define i32 @f3(
+bb0:
+  switch i32 %x, label %bb1 [ i32 0, label %bb2]
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+declare void @g(i1)
+define void @f4(i8 * %x)  {
+; CHECK: define void @f4(
+bb0:
+  %y = icmp eq i8* null, %x
+  br i1 %y, label %bb2, label %bb1
+bb1:
+  br label %bb2
+bb2:
+  %zed = icmp eq i8* null, %x
+  call void @g(i1 %zed)
+; CHECK: call void @g(i1 %y)
+  ret void
+}
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 9e08004ea476..e7641691264c 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -620,7 +620,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK: load i16*
 ; CHECK-NOT: load
-; CHECK-ret i32
+; CHECK: ret i32
 }
 
 define i32 @test_widening2() nounwind ssp noredzone {
@@ -644,7 +644,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK: load i32*
 ; CHECK-NOT: load
-; CHECK-ret i32
+; CHECK: ret i32
 }
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
index e0be41fa6657..c918bc9d5dbb 100644
--- a/test/Transforms/Inline/always-inline.ll
+++ b/test/Transforms/Inline/always-inline.ll
@@ -33,7 +33,6 @@ define void @outer2(i32 %N) {
 ;
 ; CHECK: @outer2
 ; CHECK-NOT: call void @inner2
-; CHECK alloca i32, i32 %N
 ; CHECK-NOT: call void @inner2
 ; CHECK: ret void
 
diff --git a/test/Transforms/InstCombine/memcpy.ll b/test/Transforms/InstCombine/memcpy.ll
index 8a2e3aaad027..3a68ff95af82 100644
--- a/test/Transforms/InstCombine/memcpy.ll
+++ b/test/Transforms/InstCombine/memcpy.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define void @test1(i8* %a) {
         tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
@@ -17,3 +18,10 @@ define void @test2(i8* %a) {
 ; CHECK: define void @test2
 ; CHECK-NEXT: call void @llvm.memcpy
 }
+
+define void @test3(i8* %d, i8* %s) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i32 4, i1 false)
+        ret void
+; CHECK: define void @test3
+; CHECK-NEXT: call void @llvm.memcpy
+}
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index dbb0ffcd6742..d7e292155cd7 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -238,3 +238,20 @@ xpto:
 return:
   ret i32 42
 }
+
+; CHECK: @PR13621
+define i32 @PR13621(i1 %bool) nounwind {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %gep = getelementptr i8* %gep, i32 1
+  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+; CHECK: ret i32 undef
+  ret i32 %o
+
+return:
+  ret i32 7
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index d9bb3f25bd79..0a7ba5de71bc 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1272,7 +1272,7 @@ g:
 ; Delete retain,release pairs around loops.
 
 ; CHECK: define void @test39(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39(i8* %p) {
 entry:
@@ -1290,7 +1290,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops containing uses.
 
 ; CHECK: define void @test39b(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39b(i8* %p) {
 entry:
@@ -1309,7 +1309,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops containing potential decrements.
 
 ; CHECK: define void @test39c(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39c(i8* %p) {
 entry:
@@ -1329,7 +1329,7 @@ exit:                                             ; preds = %loop
 ; the successors are in a different order.
 
 ; CHECK: define void @test40(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test40(i8* %p) {
 entry:
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 76e82a587b8d..1a58e34940e1 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -76,12 +76,12 @@ done:
 ; CHECK: define void @test2() {
 ; CHECK: invoke.cont:
 ; CHECK-NEXT: call i8* @objc_retain
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc_r
 ; CHECK: finally.cont:
 ; CHECK-NEXT: call void @objc_release
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc
 ; CHECK: finally.rethrow:
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc
 ; CHECK: }
 define void @test2() {
 entry:
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index cc207f764da2..00b62feaeb15 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -648,6 +648,49 @@ TEST(APFloatTest, exactInverse) {
   EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(0));
 }
 
+TEST(APFloatTest, roundToIntegral) {
+  APFloat T(-0.5), S(3.14), R(APFloat::getLargest(APFloat::IEEEdouble)), P(0.0);
+
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(-1.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(3.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(3.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(4.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(3.0, P.convertToDouble());
+
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+}
+
 TEST(APFloatTest, getLargest) {
   EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle).convertToFloat());
   EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble).convertToDouble());
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index c45db2cdc007..6f576681a3e2 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -178,150 +178,150 @@ TEST(AlignOfTest, BasicAlignedArray) {
   // types because of the bugs mentioned above where GCC and Clang both
   // disregard the arbitrary alignment specifier until the type is used to
   // declare a member of a struct.
-  EXPECT_LE(1u, alignOf<AlignedCharArray<SA1>::union_type>());
-  EXPECT_LE(2u, alignOf<AlignedCharArray<SA2>::union_type>());
-  EXPECT_LE(4u, alignOf<AlignedCharArray<SA4>::union_type>());
-  EXPECT_LE(8u, alignOf<AlignedCharArray<SA8>::union_type>());
+  EXPECT_LE(1u, alignOf<AlignedCharArrayUnion<SA1> >());
+  EXPECT_LE(2u, alignOf<AlignedCharArrayUnion<SA2> >());
+  EXPECT_LE(4u, alignOf<AlignedCharArrayUnion<SA4> >());
+  EXPECT_LE(8u, alignOf<AlignedCharArrayUnion<SA8> >());
 
-  EXPECT_LE(1u, sizeof(AlignedCharArray<SA1>::union_type));
-  EXPECT_LE(2u, sizeof(AlignedCharArray<SA2>::union_type));
-  EXPECT_LE(4u, sizeof(AlignedCharArray<SA4>::union_type));
-  EXPECT_LE(8u, sizeof(AlignedCharArray<SA8>::union_type));
+  EXPECT_LE(1u, sizeof(AlignedCharArrayUnion<SA1>));
+  EXPECT_LE(2u, sizeof(AlignedCharArrayUnion<SA2>));
+  EXPECT_LE(4u, sizeof(AlignedCharArrayUnion<SA4>));
+  EXPECT_LE(8u, sizeof(AlignedCharArrayUnion<SA8>));
 
-  EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1>::union_type>()));
-  EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1, SA2>::union_type>()));
-  EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1, SA2, SA4>::union_type>()));
-  EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1, SA2, SA4, SA8>::union_type>()));
+  EXPECT_EQ(1u, (alignOf<AlignedCharArrayUnion<SA1> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArrayUnion<SA1, SA2> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArrayUnion<SA1, SA2, SA4> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArrayUnion<SA1, SA2, SA4, SA8> >()));
 
-  EXPECT_EQ(1u, sizeof(AlignedCharArray<SA1>::union_type));
-  EXPECT_EQ(2u, sizeof(AlignedCharArray<SA1, SA2>::union_type));
-  EXPECT_EQ(4u, sizeof(AlignedCharArray<SA1, SA2, SA4>::union_type));
-  EXPECT_EQ(8u, sizeof(AlignedCharArray<SA1, SA2, SA4, SA8>::union_type));
+  EXPECT_EQ(1u, sizeof(AlignedCharArrayUnion<SA1>));
+  EXPECT_EQ(2u, sizeof(AlignedCharArrayUnion<SA1, SA2>));
+  EXPECT_EQ(4u, sizeof(AlignedCharArrayUnion<SA1, SA2, SA4>));
+  EXPECT_EQ(8u, sizeof(AlignedCharArrayUnion<SA1, SA2, SA4, SA8>));
 
-  EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1[1]>::union_type>()));
-  EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1[2], SA2[1]>::union_type>()));
-  EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1[42], SA2[55],
-                                          SA4[13]>::union_type>()));
-  EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1[2], SA2[1],
-                                          SA4, SA8>::union_type>()));
+  EXPECT_EQ(1u, (alignOf<AlignedCharArrayUnion<SA1[1]> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArrayUnion<SA1[2], SA2[1]> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArrayUnion<SA1[42], SA2[55],
+                                               SA4[13]> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArrayUnion<SA1[2], SA2[1],
+                                               SA4, SA8> >()));
 
-  EXPECT_EQ(1u,  sizeof(AlignedCharArray<SA1[1]>::union_type));
-  EXPECT_EQ(2u,  sizeof(AlignedCharArray<SA1[2], SA2[1]>::union_type));
-  EXPECT_EQ(4u,  sizeof(AlignedCharArray<SA1[3], SA2[2], SA4>::union_type));
-  EXPECT_EQ(16u, sizeof(AlignedCharArray<SA1, SA2[3],
-                                         SA4[3], SA8>::union_type));
+  EXPECT_EQ(1u,  sizeof(AlignedCharArrayUnion<SA1[1]>));
+  EXPECT_EQ(2u,  sizeof(AlignedCharArrayUnion<SA1[2], SA2[1]>));
+  EXPECT_EQ(4u,  sizeof(AlignedCharArrayUnion<SA1[3], SA2[2], SA4>));
+  EXPECT_EQ(16u, sizeof(AlignedCharArrayUnion<SA1, SA2[3],
+                                              SA4[3], SA8>));
 
   // For other tests we simply assert that the alignment of the union mathes
   // that of the fundamental type and hope that we have any weird type
   // productions that would trigger bugs.
-  EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArray<char>::union_type>());
-  EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArray<short>::union_type>());
-  EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArray<int>::union_type>());
-  EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArray<long>::union_type>());
+  EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArrayUnion<char> >());
+  EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArrayUnion<short> >());
+  EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArrayUnion<int> >());
+  EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArrayUnion<long> >());
   EXPECT_EQ(alignOf<long long>(),
-            alignOf<AlignedCharArray<long long>::union_type>());
-  EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArray<float>::union_type>());
-  EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArray<double>::union_type>());
+            alignOf<AlignedCharArrayUnion<long long> >());
+  EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArrayUnion<float> >());
+  EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArrayUnion<double> >());
   EXPECT_EQ(alignOf<long double>(),
-            alignOf<AlignedCharArray<long double>::union_type>());
-  EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArray<void *>::union_type>());
-  EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArray<int *>::union_type>());
+            alignOf<AlignedCharArrayUnion<long double> >());
+  EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArrayUnion<void *> >());
+  EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArrayUnion<int *> >());
   EXPECT_EQ(alignOf<double (*)(double)>(),
-            alignOf<AlignedCharArray<double (*)(double)>::union_type>());
+            alignOf<AlignedCharArrayUnion<double (*)(double)> >());
   EXPECT_EQ(alignOf<double (S6::*)()>(),
-            alignOf<AlignedCharArray<double (S6::*)()>::union_type>());
-  EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArray<S1>::union_type>());
-  EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArray<S2>::union_type>());
-  EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArray<S3>::union_type>());
-  EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArray<S4>::union_type>());
-  EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArray<S5>::union_type>());
-  EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArray<S6>::union_type>());
-  EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArray<D1>::union_type>());
-  EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArray<D2>::union_type>());
-  EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArray<D3>::union_type>());
-  EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArray<D4>::union_type>());
-  EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArray<D5>::union_type>());
-  EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArray<D6>::union_type>());
-  EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArray<D7>::union_type>());
-  EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArray<D8>::union_type>());
-  EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArray<D9>::union_type>());
-  EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArray<V1>::union_type>());
-  EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArray<V2>::union_type>());
-  EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArray<V3>::union_type>());
-  EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArray<V4>::union_type>());
-  EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArray<V5>::union_type>());
-  EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArray<V6>::union_type>());
-  EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArray<V7>::union_type>());
+            alignOf<AlignedCharArrayUnion<double (S6::*)()> >());
+  EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArrayUnion<S1> >());
+  EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArrayUnion<S2> >());
+  EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArrayUnion<S3> >());
+  EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArrayUnion<S4> >());
+  EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArrayUnion<S5> >());
+  EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArrayUnion<S6> >());
+  EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArrayUnion<D1> >());
+  EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArrayUnion<D2> >());
+  EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArrayUnion<D3> >());
+  EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArrayUnion<D4> >());
+  EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArrayUnion<D5> >());
+  EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArrayUnion<D6> >());
+  EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArrayUnion<D7> >());
+  EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArrayUnion<D8> >());
+  EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArrayUnion<D9> >());
+  EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArrayUnion<V1> >());
+  EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArrayUnion<V2> >());
+  EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArrayUnion<V3> >());
+  EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArrayUnion<V4> >());
+  EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArrayUnion<V5> >());
+  EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArrayUnion<V6> >());
+  EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArrayUnion<V7> >());
 
   // Some versions of MSVC get this wrong somewhat disturbingly. The failure
   // appears to be benign: alignOf<V8>() produces a preposterous value: 12
 #ifndef _MSC_VER
-  EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArray<V8>::union_type>());
+  EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArrayUnion<V8> >());
 #endif
 
-  EXPECT_EQ(sizeof(char), sizeof(AlignedCharArray<char>::union_type));
-  EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArray<char[1]>::union_type));
-  EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArray<char[2]>::union_type));
-  EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArray<char[3]>::union_type));
-  EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArray<char[4]>::union_type));
-  EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArray<char[5]>::union_type));
-  EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArray<char[8]>::union_type));
-  EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArray<char[13]>::union_type));
-  EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArray<char[16]>::union_type));
-  EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArray<char[21]>::union_type));
-  EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArray<char[32]>::union_type));
-  EXPECT_EQ(sizeof(short), sizeof(AlignedCharArray<short>::union_type));
-  EXPECT_EQ(sizeof(int), sizeof(AlignedCharArray<int>::union_type));
-  EXPECT_EQ(sizeof(long), sizeof(AlignedCharArray<long>::union_type));
+  EXPECT_EQ(sizeof(char), sizeof(AlignedCharArrayUnion<char>));
+  EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArrayUnion<char[1]>));
+  EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArrayUnion<char[2]>));
+  EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArrayUnion<char[3]>));
+  EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArrayUnion<char[4]>));
+  EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArrayUnion<char[5]>));
+  EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArrayUnion<char[8]>));
+  EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArrayUnion<char[13]>));
+  EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArrayUnion<char[16]>));
+  EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArrayUnion<char[21]>));
+  EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArrayUnion<char[32]>));
+  EXPECT_EQ(sizeof(short), sizeof(AlignedCharArrayUnion<short>));
+  EXPECT_EQ(sizeof(int), sizeof(AlignedCharArrayUnion<int>));
+  EXPECT_EQ(sizeof(long), sizeof(AlignedCharArrayUnion<long>));
   EXPECT_EQ(sizeof(long long),
-            sizeof(AlignedCharArray<long long>::union_type));
-  EXPECT_EQ(sizeof(float), sizeof(AlignedCharArray<float>::union_type));
-  EXPECT_EQ(sizeof(double), sizeof(AlignedCharArray<double>::union_type));
+            sizeof(AlignedCharArrayUnion<long long>));
+  EXPECT_EQ(sizeof(float), sizeof(AlignedCharArrayUnion<float>));
+  EXPECT_EQ(sizeof(double), sizeof(AlignedCharArrayUnion<double>));
   EXPECT_EQ(sizeof(long double),
-            sizeof(AlignedCharArray<long double>::union_type));
-  EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArray<void *>::union_type));
-  EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArray<int *>::union_type));
+            sizeof(AlignedCharArrayUnion<long double>));
+  EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArrayUnion<void *>));
+  EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArrayUnion<int *>));
   EXPECT_EQ(sizeof(double (*)(double)),
-            sizeof(AlignedCharArray<double (*)(double)>::union_type));
+            sizeof(AlignedCharArrayUnion<double (*)(double)>));
   EXPECT_EQ(sizeof(double (S6::*)()),
-            sizeof(AlignedCharArray<double (S6::*)()>::union_type));
-  EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArray<S1>::union_type));
-  EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArray<S2>::union_type));
-  EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArray<S3>::union_type));
-  EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArray<S4>::union_type));
-  EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArray<S5>::union_type));
-  EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArray<S6>::union_type));
-  EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArray<D1>::union_type));
-  EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArray<D2>::union_type));
-  EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArray<D3>::union_type));
-  EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArray<D4>::union_type));
-  EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArray<D5>::union_type));
-  EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArray<D6>::union_type));
-  EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArray<D7>::union_type));
-  EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArray<D8>::union_type));
-  EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArray<D9>::union_type));
-  EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArray<D9[1]>::union_type));
-  EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArray<D9[2]>::union_type));
-  EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArray<D9[3]>::union_type));
-  EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArray<D9[4]>::union_type));
-  EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArray<D9[5]>::union_type));
-  EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArray<D9[8]>::union_type));
-  EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArray<D9[13]>::union_type));
-  EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArray<D9[16]>::union_type));
-  EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArray<D9[21]>::union_type));
-  EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArray<D9[32]>::union_type));
-  EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArray<V1>::union_type));
-  EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArray<V2>::union_type));
-  EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArray<V3>::union_type));
-  EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArray<V4>::union_type));
-  EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArray<V5>::union_type));
-  EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArray<V6>::union_type));
-  EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArray<V7>::union_type));
+            sizeof(AlignedCharArrayUnion<double (S6::*)()>));
+  EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArrayUnion<S1>));
+  EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArrayUnion<S2>));
+  EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArrayUnion<S3>));
+  EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArrayUnion<S4>));
+  EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArrayUnion<S5>));
+  EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArrayUnion<S6>));
+  EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArrayUnion<D1>));
+  EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArrayUnion<D2>));
+  EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArrayUnion<D3>));
+  EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArrayUnion<D4>));
+  EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArrayUnion<D5>));
+  EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArrayUnion<D6>));
+  EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArrayUnion<D7>));
+  EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArrayUnion<D8>));
+  EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArrayUnion<D9>));
+  EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArrayUnion<D9[1]>));
+  EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArrayUnion<D9[2]>));
+  EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArrayUnion<D9[3]>));
+  EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArrayUnion<D9[4]>));
+  EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArrayUnion<D9[5]>));
+  EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArrayUnion<D9[8]>));
+  EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArrayUnion<D9[13]>));
+  EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArrayUnion<D9[16]>));
+  EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArrayUnion<D9[21]>));
+  EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArrayUnion<D9[32]>));
+  EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArrayUnion<V1>));
+  EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArrayUnion<V2>));
+  EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArrayUnion<V3>));
+  EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArrayUnion<V4>));
+  EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArrayUnion<V5>));
+  EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArrayUnion<V6>));
+  EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArrayUnion<V7>));
 
   // Some versions of MSVC also get this wrong. The failure again appears to be
   // benign: sizeof(V8) is only 52 bytes, but our array reserves 56.
 #ifndef _MSC_VER
-  EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArray<V8>::union_type));
+  EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArrayUnion<V8>));
 #endif
 }
 
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index a071a5a8d694..63c9ae059157 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -340,44 +340,51 @@ TEST_F(FileSystemTest, Permissions) {
 }
 #endif
 
-#if !defined(_WIN32) // FIXME: temporary suppressed.
 TEST_F(FileSystemTest, FileMapping) {
   // Create a temp file.
   int FileDescriptor;
   SmallString<64> TempPath;
   ASSERT_NO_ERROR(
     fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath));
-
-  // Grow temp file to be 4096 bytes 
-  ASSERT_NO_ERROR(sys::fs::resize_file(Twine(TempPath), 4096));
-  
   // Map in temp file and add some content
-  void* MappedMemory;
-  ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, 
-                                true /*writable*/, MappedMemory));
-  char* Memory = reinterpret_cast<char*>(MappedMemory);
-  strcpy(Memory, "hello there");
-  
-  // Unmap temp file
-  ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096));
-  MappedMemory = NULL;
-  Memory = NULL;
+  error_code EC;
+  StringRef Val("hello there");
+  {
+    fs::mapped_file_region mfr(FileDescriptor,
+                               fs::mapped_file_region::readwrite,
+                               4096,
+                               0,
+                               EC);
+    ASSERT_NO_ERROR(EC);
+    std::copy(Val.begin(), Val.end(), mfr.data());
+    // Explicitly add a 0.
+    mfr.data()[Val.size()] = 0;
+    // Unmap temp file
+  }
   
   // Map it back in read-only
-  ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, 
-                                false /*read-only*/, MappedMemory));
+  fs::mapped_file_region mfr(Twine(TempPath),
+                             fs::mapped_file_region::readonly,
+                             0,
+                             0,
+                             EC);
+  ASSERT_NO_ERROR(EC);
   
   // Verify content
-  Memory = reinterpret_cast<char*>(MappedMemory);
-  bool SAME = (strcmp(Memory, "hello there") == 0);
-  EXPECT_TRUE(SAME);
+  EXPECT_EQ(StringRef(mfr.const_data()), Val);
   
   // Unmap temp file
-  ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096));
-  MappedMemory = NULL;
-  Memory = NULL;
-}
-#endif
-
 
+#if LLVM_USE_RVALUE_REFERENCES
+  fs::mapped_file_region m(Twine(TempPath),
+                             fs::mapped_file_region::readonly,
+                             0,
+                             0,
+                             EC);
+  ASSERT_NO_ERROR(EC);
+  const char *Data = m.const_data();
+  fs::mapped_file_region mfrrv(llvm_move(m));
+  EXPECT_EQ(mfrrv.const_data(), Data);
+#endif
+}
 } // anonymous namespace
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 33381e956905..12e153a66514 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -297,6 +297,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
   isCompare    = R->getValueAsBit("isCompare");
   isMoveImm    = R->getValueAsBit("isMoveImm");
   isBitcast    = R->getValueAsBit("isBitcast");
+  isSelect     = R->getValueAsBit("isSelect");
   isBarrier    = R->getValueAsBit("isBarrier");
   isCall       = R->getValueAsBit("isCall");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 3ba9f24daaed..95b572d2d08c 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -222,6 +222,7 @@ namespace llvm {
     bool isCompare;
     bool isMoveImm;
     bool isBitcast;
+    bool isSelect;
     bool isBarrier;
     bool isCall;
     bool canFoldAsLoad;
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index ff3ad7283939..011f4b79386f 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -28,19 +28,15 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
-  : TheDef(R),
-    EnumValue(Enum)
-{}
-
-std::string CodeGenSubRegIndex::getNamespace() const {
-  if (TheDef->getValue("Namespace"))
-    return TheDef->getValueAsString("Namespace");
-  else
-    return "";
+  : TheDef(R), EnumValue(Enum) {
+  Name = R->getName();
+  if (R->getValue("Namespace"))
+    Namespace = R->getValueAsString("Namespace");
 }
 
-const std::string &CodeGenSubRegIndex::getName() const {
-  return TheDef->getName();
+CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace,
+                                       unsigned Enum)
+  : TheDef(0), Name(N), Namespace(Nspace), EnumValue(Enum) {
 }
 
 std::string CodeGenSubRegIndex::getQualifiedName() const {
@@ -52,16 +48,31 @@ std::string CodeGenSubRegIndex::getQualifiedName() const {
 }
 
 void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
-  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
-  if (Comps.empty())
+  if (!TheDef)
     return;
-  if (Comps.size() != 2)
-    throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
-  CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
-  CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
-  CodeGenSubRegIndex *X = A->addComposite(B, this);
-  if (X)
-    throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+
+  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
+  if (!Comps.empty()) {
+    if (Comps.size() != 2)
+      throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
+    CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
+    CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
+    CodeGenSubRegIndex *X = A->addComposite(B, this);
+    if (X)
+      throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+  }
+
+  std::vector<Record*> Parts =
+    TheDef->getValueAsListOfDefs("CoveringSubRegIndices");
+  if (!Parts.empty()) {
+    if (Parts.size() < 2)
+      throw TGError(TheDef->getLoc(),
+                    "CoveredBySubRegs must have two or more entries");
+    SmallVector<CodeGenSubRegIndex*, 8> IdxParts;
+    for (unsigned i = 0, e = Parts.size(); i != e; ++i)
+      IdxParts.push_back(RegBank.getSubRegIdx(Parts[i]));
+    RegBank.addConcatSubRegIndex(IdxParts, this);
+  }
 }
 
 void CodeGenSubRegIndex::cleanComposites() {
@@ -937,7 +948,7 @@ void CodeGenRegisterClass::buildRegUnitSet(
 //                               CodeGenRegBank
 //===----------------------------------------------------------------------===//
 
-CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
+CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
   // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
   Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
@@ -947,7 +958,6 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   // More indices will be synthesized later.
   std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
   std::sort(SRIs.begin(), SRIs.end(), LessRecord());
-  NumNamedIndices = SRIs.size();
   for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
     getSubRegIdx(SRIs[i]);
   // Build composite maps from ComposedOf fields.
@@ -1015,6 +1025,15 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   CodeGenRegisterClass::computeSubClasses(*this);
 }
 
+// Create a synthetic CodeGenSubRegIndex without a corresponding Record.
+CodeGenSubRegIndex*
+CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) {
+  CodeGenSubRegIndex *Idx = new CodeGenSubRegIndex(Name, Namespace,
+                                                   SubRegIndices.size() + 1);
+  SubRegIndices.push_back(Idx);
+  return Idx;
+}
+
 CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) {
   CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def];
   if (Idx)
@@ -1079,7 +1098,7 @@ CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A,
 
   // None exists, synthesize one.
   std::string Name = A->getName() + "_then_" + B->getName();
-  Comp = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  Comp = createSubRegIndex(Name, A->getNamespace());
   A->addComposite(B, Comp);
   return Comp;
 }
@@ -1099,7 +1118,7 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex*, 8> &Parts) {
     Name += '_';
     Name += Parts[i]->getName();
   }
-  return Idx = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  return Idx = createSubRegIndex(Name, Parts.front()->getNamespace());
 }
 
 void CodeGenRegBank::computeComposites() {
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index eb6724ea1ac5..827063e47017 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -35,14 +35,17 @@ namespace llvm {
   /// CodeGenSubRegIndex - Represents a sub-register index.
   class CodeGenSubRegIndex {
     Record *const TheDef;
+    std::string Name;
+    std::string Namespace;
 
   public:
     const unsigned EnumValue;
 
     CodeGenSubRegIndex(Record *R, unsigned Enum);
+    CodeGenSubRegIndex(StringRef N, StringRef Nspace, unsigned Enum);
 
-    const std::string &getName() const;
-    std::string getNamespace() const;
+    const std::string &getName() const { return Name; }
+    const std::string &getNamespace() const { return Namespace; }
     std::string getQualifiedName() const;
 
     // Order CodeGenSubRegIndex pointers by EnumValue.
@@ -422,13 +425,13 @@ namespace llvm {
   // CodeGenRegBank - Represent a target's registers and the relations between
   // them.
   class CodeGenRegBank {
-    RecordKeeper &Records;
     SetTheory Sets;
 
     // SubRegIndices.
     std::vector<CodeGenSubRegIndex*> SubRegIndices;
     DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx;
-    unsigned NumNamedIndices;
+
+    CodeGenSubRegIndex *createSubRegIndex(StringRef Name, StringRef NameSpace);
 
     typedef std::map<SmallVector<CodeGenSubRegIndex*, 8>,
                      CodeGenSubRegIndex*> ConcatIdxMap;
@@ -495,7 +498,6 @@ namespace llvm {
     // in the .td files. The rest are synthesized such that all sub-registers
     // have a unique name.
     ArrayRef<CodeGenSubRegIndex*> getSubRegIndices() { return SubRegIndices; }
-    unsigned getNumNamedIndices() { return NumNamedIndices; }
 
     // Find a SubRegIndex form its Record def.
     CodeGenSubRegIndex *getSubRegIdx(Record*);
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 2cdde5500954..e89c393b6a9b 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -17,9 +17,15 @@
 #include "CodeGenTarget.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
@@ -35,9 +41,7 @@ struct EncodingField {
   EncodingField(unsigned B, unsigned W, unsigned O)
     : Base(B), Width(W), Offset(O) { }
 };
-} // End anonymous namespace
 
-namespace {
 struct OperandInfo {
   std::vector<EncodingField> Fields;
   std::string Decoder;
@@ -56,10 +60,25 @@ struct OperandInfo {
   const_iterator begin() const { return Fields.begin(); }
   const_iterator end() const   { return Fields.end();   }
 };
+
+typedef std::vector<uint8_t> DecoderTable;
+typedef uint32_t DecoderFixup;
+typedef std::vector<DecoderFixup> FixupList;
+typedef std::vector<FixupList> FixupScopeList;
+typedef SetVector<std::string> PredicateSet;
+typedef SetVector<std::string> DecoderSet;
+struct DecoderTableInfo {
+  DecoderTable Table;
+  FixupScopeList FixupStack;
+  PredicateSet Predicates;
+  DecoderSet Decoders;
+};
+
 } // End anonymous namespace
 
 namespace {
 class FixedLenDecoderEmitter {
+  const std::vector<const CodeGenInstruction*> *NumberedInstructions;
 public:
 
   // Defaults preserved here for documentation, even though they aren't
@@ -77,6 +96,17 @@ public:
     GuardPrefix(GPrefix), GuardPostfix(GPostfix),
     ReturnOK(ROK), ReturnFail(RFail), Locals(L) {}
 
+  // Emit the decoder state machine table.
+  void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
+                 unsigned Indentation, unsigned BitWidth,
+                 StringRef Namespace) const;
+  void emitPredicateFunction(formatted_raw_ostream &OS,
+                             PredicateSet &Predicates,
+                             unsigned Indentation) const;
+  void emitDecoderFunction(formatted_raw_ostream &OS,
+                           DecoderSet &Decoders,
+                           unsigned Indentation) const;
+
   // run - Output the code emitter
   void run(raw_ostream &o);
 
@@ -120,9 +150,7 @@ static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
 }
 // Prints the bit value for each position.
 static void dumpBits(raw_ostream &o, const BitsInit &bits) {
-  unsigned index;
-
-  for (index = bits.getNumBits(); index > 0; index--) {
+  for (unsigned index = bits.getNumBits(); index > 0; --index) {
     switch (bitFromBits(bits, index - 1)) {
     case BIT_TRUE:
       o << "1";
@@ -238,8 +266,9 @@ public:
   // match the remaining undecoded encoding bits against the singleton.
   void recurse();
 
-  // Emit code to decode instructions given a segment or segments of bits.
-  void emit(raw_ostream &o, unsigned &Indentation) const;
+  // Emit table entries to decode instructions given a segment or segments of
+  // bits.
+  void emitTableEntry(DecoderTableInfo &TableInfo) const;
 
   // Returns the number of fanout produced by the filter.  More fanout implies
   // the filter distinguishes more categories of instructions.
@@ -338,12 +367,7 @@ public:
     doFilter();
   }
 
-  // The top level filter chooser has NULL as its parent.
-  bool isTopLevel() const { return Parent == NULL; }
-
-  // Emit the top level typedef and decodeInstruction() function.
-  void emitTop(raw_ostream &o, unsigned Indentation,
-               const std::string &Namespace) const;
+  unsigned getBitWidth() const { return BitWidth; }
 
 protected:
   // Populates the insn given the uid.
@@ -414,21 +438,28 @@ protected:
   bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
                           unsigned Opc) const;
 
-  void emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                         unsigned Opc) const;
+  bool doesOpcodeNeedPredicate(unsigned Opc) const;
+  unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
+  void emitPredicateTableEntry(DecoderTableInfo &TableInfo,
+                               unsigned Opc) const;
+
+  void emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
+                              unsigned Opc) const;
 
-  // Emits code to decode the singleton.  Return true if we have matched all the
-  // well-known bits.
-  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            unsigned Opc) const;
+  // Emits table entries to decode the singleton.
+  void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                               unsigned Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            const Filter &Best) const;
+  void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                               const Filter &Best) const;
 
-  void emitBinaryParser(raw_ostream &o , unsigned &Indentation,
+  void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
                         const OperandInfo &OpInfo) const;
 
+  void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc) const;
+  unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc) const;
+
   // Assign a single filter and run with it.
   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
 
@@ -447,10 +478,10 @@ protected:
   // dump the conflict set to the standard error.
   void doFilter();
 
-  // Emits code to decode our share of instructions.  Returns true if the
-  // emitted code causes a return, which occurs if we know how to decode
-  // the instruction at this level or the instruction is not decodeable.
-  bool emit(raw_ostream &o, unsigned &Indentation) const;
+public:
+  // emitTableEntries - Emit state machine entries to decode our share of
+  // instructions.
+  void emitTableEntries(DecoderTableInfo &TableInfo) const;
 };
 } // End anonymous namespace
 
@@ -524,11 +555,9 @@ void Filter::recurse() {
   // Starts by inheriting our parent filter chooser's filter bit values.
   std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues);
 
-  unsigned bitIndex;
-
   if (VariableInstructions.size()) {
     // Conservatively marks each segment position as BIT_UNSET.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
       BitValueArray[StartBit + bitIndex] = BIT_UNSET;
 
     // Delegates to an inferior filter chooser for further processing on this
@@ -544,7 +573,7 @@ void Filter::recurse() {
   }
 
   // No need to recurse for a singleton filtered instruction.
-  // See also Filter::emit().
+  // See also Filter::emit*().
   if (getNumFiltered() == 1) {
     //Owner->SingletonExists(LastOpcFiltered);
     assert(FilterChooserMap.size() == 1);
@@ -557,7 +586,7 @@ void Filter::recurse() {
        mapIterator++) {
 
     // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) {
       if (mapIterator->first & (1ULL << bitIndex))
         BitValueArray[StartBit + bitIndex] = BIT_TRUE;
       else
@@ -577,64 +606,100 @@ void Filter::recurse() {
   }
 }
 
-// Emit code to decode instructions given a segment or segments of bits.
-void Filter::emit(raw_ostream &o, unsigned &Indentation) const {
-  o.indent(Indentation) << "// Check Inst{";
-
-  if (NumBits > 1)
-    o << (StartBit + NumBits - 1) << '-';
+static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
+                               uint32_t DestIdx) {
+  // Any NumToSkip fixups in the current scope can resolve to the
+  // current location.
+  for (FixupList::const_reverse_iterator I = Fixups.rbegin(),
+                                         E = Fixups.rend();
+       I != E; ++I) {
+    // Calculate the distance from the byte following the fixup entry byte
+    // to the destination. The Target is calculated from after the 16-bit
+    // NumToSkip entry itself, so subtract two  from the displacement here
+    // to account for that.
+    uint32_t FixupIdx = *I;
+    uint32_t Delta = DestIdx - FixupIdx - 2;
+    // Our NumToSkip entries are 16-bits. Make sure our table isn't too
+    // big.
+    assert(Delta < 65536U && "disassembler decoding table too large!");
+    Table[FixupIdx] = (uint8_t)Delta;
+    Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
+  }
+}
 
-  o << StartBit << "} ...\n";
+// Emit table entries to decode instructions given a segment or segments
+// of bits.
+void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
+  TableInfo.Table.push_back(MCD::OPC_ExtractField);
+  TableInfo.Table.push_back(StartBit);
+  TableInfo.Table.push_back(NumBits);
 
-  o.indent(Indentation) << "switch (fieldFromInstruction" << Owner->BitWidth
-                        << "(insn, " << StartBit << ", "
-                        << NumBits << ")) {\n";
+  // A new filter entry begins a new scope for fixup resolution.
+  TableInfo.FixupStack.push_back(FixupList());
 
   std::map<unsigned, const FilterChooser*>::const_iterator filterIterator;
 
-  bool DefaultCase = false;
+  DecoderTable &Table = TableInfo.Table;
+
+  size_t PrevFilter = 0;
+  bool HasFallthrough = false;
   for (filterIterator = FilterChooserMap.begin();
        filterIterator != FilterChooserMap.end();
        filterIterator++) {
-
     // Field value -1 implies a non-empty set of variable instructions.
     // See also recurse().
     if (filterIterator->first == (unsigned)-1) {
-      DefaultCase = true;
-
-      o.indent(Indentation) << "default:\n";
-      o.indent(Indentation) << "  break; // fallthrough\n";
-
-      // Closing curly brace for the switch statement.
-      // This is unconventional because we want the default processing to be
-      // performed for the fallthrough cases as well, i.e., when the "cases"
-      // did not prove a decoded instruction.
-      o.indent(Indentation) << "}\n";
-
-    } else
-      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+      HasFallthrough = true;
+
+      // Each scope should always have at least one filter value to check
+      // for.
+      assert(PrevFilter != 0 && "empty filter set!");
+      FixupList &CurScope = TableInfo.FixupStack.back();
+      // Resolve any NumToSkip fixups in the current scope.
+      resolveTableFixups(Table, CurScope, Table.size());
+      CurScope.clear();
+      PrevFilter = 0;  // Don't re-process the filter's fallthrough.
+    } else {
+      Table.push_back(MCD::OPC_FilterValue);
+      // Encode and emit the value to filter against.
+      uint8_t Buffer[8];
+      unsigned Len = encodeULEB128(filterIterator->first, Buffer);
+      Table.insert(Table.end(), Buffer, Buffer + Len);
+      // Reserve space for the NumToSkip entry. We'll backpatch the value
+      // later.
+      PrevFilter = Table.size();
+      Table.push_back(0);
+      Table.push_back(0);
+    }
 
     // We arrive at a category of instructions with the same segment value.
     // Now delegate to the sub filter chooser for further decodings.
     // The case may fallthrough, which happens if the remaining well-known
     // encoding bits do not match exactly.
-    if (!DefaultCase) { ++Indentation; ++Indentation; }
-
-    filterIterator->second->emit(o, Indentation);
-    // For top level default case, there's no need for a break statement.
-    if (Owner->isTopLevel() && DefaultCase)
-      break;
-    
-    o.indent(Indentation) << "break;\n";
-
-    if (!DefaultCase) { --Indentation; --Indentation; }
+    filterIterator->second->emitTableEntries(TableInfo);
+
+    // Now that we've emitted the body of the handler, update the NumToSkip
+    // of the filter itself to be able to skip forward when false. Subtract
+    // two as to account for the width of the NumToSkip field itself.
+    if (PrevFilter) {
+      uint32_t NumToSkip = Table.size() - PrevFilter - 2;
+      assert(NumToSkip < 65536U && "disassembler decoding table too large!");
+      Table[PrevFilter] = (uint8_t)NumToSkip;
+      Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
+    }
   }
 
-  // If there is no default case, we still need to supply a closing brace.
-  if (!DefaultCase) {
-    // Closing curly brace for the switch statement.
-    o.indent(Indentation) << "}\n";
-  }
+  // Any remaining unresolved fixups bubble up to the parent fixup scope.
+  assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!");
+  FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1;
+  FixupScopeList::iterator Dest = Source - 1;
+  Dest->insert(Dest->end(), Source->begin(), Source->end());
+  TableInfo.FixupStack.pop_back();
+
+  // If there is no fallthrough, then the final filter should get fixed
+  // up according to the enclosing scope rather than the current position.
+  if (!HasFallthrough)
+    TableInfo.FixupStack.back().push_back(PrevFilter);
 }
 
 // Returns the number of fanout produced by the filter.  More fanout implies
@@ -652,31 +717,205 @@ unsigned Filter::usefulness() const {
 //                              //
 //////////////////////////////////
 
-// Emit the top level typedef and decodeInstruction() function.
-void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
-                            const std::string &Namespace) const {
-  o.indent(Indentation) <<
-    "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction"
-    << BitWidth << "(MCInst &MI, uint" << BitWidth
-    << "_t insn, uint64_t Address, "
-    << "const void *Decoder, const MCSubtargetInfo &STI) {\n";
-  o.indent(Indentation) << "  unsigned tmp = 0;\n";
-  o.indent(Indentation) << "  (void)tmp;\n";
-  o.indent(Indentation) << Emitter->Locals << "\n";
-  o.indent(Indentation) << "  uint64_t Bits = STI.getFeatureBits();\n";
-  o.indent(Indentation) << "  (void)Bits;\n";
-
-  ++Indentation; ++Indentation;
-  // Emits code to decode the instructions.
-  emit(o, Indentation);
-
-  o << '\n';
-  o.indent(Indentation) << "return " << Emitter->ReturnFail << ";\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
-
-  o << '\n';
+// Emit the decoder state machine table.
+void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
+                                       DecoderTable &Table,
+                                       unsigned Indentation,
+                                       unsigned BitWidth,
+                                       StringRef Namespace) const {
+  OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
+    << BitWidth << "[] = {\n";
+
+  Indentation += 2;
+
+  // FIXME: We may be able to use the NumToSkip values to recover
+  // appropriate indentation levels.
+  DecoderTable::const_iterator I = Table.begin();
+  DecoderTable::const_iterator E = Table.end();
+  while (I != E) {
+    assert (I < E && "incomplete decode table entry!");
+
+    uint64_t Pos = I - Table.begin();
+    OS << "/* " << Pos << " */";
+    OS.PadToColumn(12);
+
+    switch (*I) {
+    default:
+      throw "invalid decode table opcode";
+    case MCD::OPC_ExtractField: {
+      ++I;
+      unsigned Start = *I++;
+      unsigned Len = *I++;
+      OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", "
+        << Len << ",  // Inst{";
+      if (Len > 1)
+        OS << (Start + Len - 1) << "-";
+      OS << Start << "} ...\n";
+      break;
+    }
+    case MCD::OPC_FilterValue: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_FilterValue, ";
+      // The filter value is ULEB128 encoded.
+      while (*I >= 128)
+        OS << utostr(*I++) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_CheckField: {
+      ++I;
+      unsigned Start = *I++;
+      unsigned Len = *I++;
+      OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", "
+        << Len << ", ";// << Val << ", " << NumToSkip << ",\n";
+      // ULEB128 encoded field value.
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_CheckPredicate: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_CheckPredicate, ";
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_Decode: {
+      ++I;
+      // Extract the ULEB128 encoded Opcode to a buffer.
+      uint8_t Buffer[8], *p = Buffer;
+      while ((*p++ = *I++) >= 128)
+        assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer)
+               && "ULEB128 value too large!");
+      // Decode the Opcode value.
+      unsigned Opc = decodeULEB128(Buffer);
+      OS.indent(Indentation) << "MCD::OPC_Decode, ";
+      for (p = Buffer; *p >= 128; ++p)
+        OS << utostr(*p) << ", ";
+      OS << utostr(*p) << ", ";
+
+      // Decoder index.
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      OS << "// Opcode: "
+         << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+      break;
+    }
+    case MCD::OPC_SoftFail: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_SoftFail";
+      // Positive mask
+      uint64_t Value = 0;
+      unsigned Shift = 0;
+      do {
+        OS << ", " << utostr(*I);
+        Value += (*I & 0x7f) << Shift;
+        Shift += 7;
+      } while (*I++ >= 128);
+      if (Value > 127)
+        OS << " /* 0x" << utohexstr(Value) << " */";
+      // Negative mask
+      Value = 0;
+      Shift = 0;
+      do {
+        OS << ", " << utostr(*I);
+        Value += (*I & 0x7f) << Shift;
+        Shift += 7;
+      } while (*I++ >= 128);
+      if (Value > 127)
+        OS << " /* 0x" << utohexstr(Value) << " */";
+      OS << ",\n";
+      break;
+    }
+    case MCD::OPC_Fail: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_Fail,\n";
+      break;
+    }
+    }
+  }
+  OS.indent(Indentation) << "0\n";
+
+  Indentation -= 2;
+
+  OS.indent(Indentation) << "};\n\n";
+}
+
+void FixedLenDecoderEmitter::
+emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
+                      unsigned Indentation) const {
+  // The predicate function is just a big switch statement based on the
+  // input predicate index.
+  OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
+    << "uint64_t Bits) {\n";
+  Indentation += 2;
+  OS.indent(Indentation) << "switch (Idx) {\n";
+  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
+  unsigned Index = 0;
+  for (PredicateSet::const_iterator I = Predicates.begin(), E = Predicates.end();
+       I != E; ++I, ++Index) {
+    OS.indent(Indentation) << "case " << Index << ":\n";
+    OS.indent(Indentation+2) << "return (" << *I << ");\n";
+  }
+  OS.indent(Indentation) << "}\n";
+  Indentation -= 2;
+  OS.indent(Indentation) << "}\n\n";
+}
+
+void FixedLenDecoderEmitter::
+emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
+                    unsigned Indentation) const {
+  // The decoder function is just a big switch statement based on the
+  // input decoder index.
+  OS.indent(Indentation) << "template<typename InsnType>\n";
+  OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
+    << " unsigned Idx, InsnType insn, MCInst &MI,\n";
+  OS.indent(Indentation) << "                                   uint64_t "
+    << "Address, const void *Decoder) {\n";
+  Indentation += 2;
+  OS.indent(Indentation) << "InsnType tmp;\n";
+  OS.indent(Indentation) << "switch (Idx) {\n";
+  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
+  unsigned Index = 0;
+  for (DecoderSet::const_iterator I = Decoders.begin(), E = Decoders.end();
+       I != E; ++I, ++Index) {
+    OS.indent(Indentation) << "case " << Index << ":\n";
+    OS << *I;
+    OS.indent(Indentation+2) << "return S;\n";
+  }
+  OS.indent(Indentation) << "}\n";
+  Indentation -= 2;
+  OS.indent(Indentation) << "}\n\n";
 }
 
 // Populates the field of the insn given the start position and the number of
@@ -703,9 +942,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
 /// filter array as a series of chars.
 void FilterChooser::dumpFilterArray(raw_ostream &o,
                                  const std::vector<bit_value_t> &filter) const {
-  unsigned bitIndex;
-
-  for (bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
+  for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
     switch (filter[bitIndex - 1]) {
     case BIT_UNFILTERED:
       o << ".";
@@ -827,26 +1064,71 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
 
   if (OpInfo.numFields() == 1) {
     OperandInfo::const_iterator OI = OpInfo.begin();
-    o.indent(Indentation) << "  tmp = fieldFromInstruction" << BitWidth
-                            << "(insn, " << OI->Base << ", " << OI->Width
-                            << ");\n";
+    o.indent(Indentation) << "tmp = fieldFromInstruction"
+                          << "(insn, " << OI->Base << ", " << OI->Width
+                          << ");\n";
   } else {
-    o.indent(Indentation) << "  tmp = 0;\n";
+    o.indent(Indentation) << "tmp = 0;\n";
     for (OperandInfo::const_iterator OI = OpInfo.begin(), OE = OpInfo.end();
          OI != OE; ++OI) {
-      o.indent(Indentation) << "  tmp |= (fieldFromInstruction" << BitWidth
+      o.indent(Indentation) << "tmp |= (fieldFromInstruction"
                             << "(insn, " << OI->Base << ", " << OI->Width
                             << ") << " << OI->Offset << ");\n";
     }
   }
 
   if (Decoder != "")
-    o.indent(Indentation) << "  " << Emitter->GuardPrefix << Decoder
+    o.indent(Indentation) << Emitter->GuardPrefix << Decoder
                           << "(MI, tmp, Address, Decoder)"
                           << Emitter->GuardPostfix << "\n";
   else
-    o.indent(Indentation) << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+    o.indent(Indentation) << "MI.addOperand(MCOperand::CreateImm(tmp));\n";
+
+}
+
+void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
+                                unsigned Opc) const {
+  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+    Operands.find(Opc);
+  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+  for (std::vector<OperandInfo>::const_iterator
+       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+    // If a custom instruction decoder was specified, use that.
+    if (I->numFields() == 0 && I->Decoder.size()) {
+      OS.indent(Indentation) << Emitter->GuardPrefix << I->Decoder
+        << "(MI, insn, Address, Decoder)"
+        << Emitter->GuardPostfix << "\n";
+      break;
+    }
 
+    emitBinaryParser(OS, Indentation, *I);
+  }
+}
+
+unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
+                                        unsigned Opc) const {
+  // Build up the predicate string.
+  SmallString<256> Decoder;
+  // FIXME: emitDecoder() function can take a buffer directly rather than
+  // a stream.
+  raw_svector_ostream S(Decoder);
+  unsigned I = 4;
+  emitDecoder(S, I, Opc);
+  S.flush();
+
+  // Using the full decoder string as the key value here is a bit
+  // heavyweight, but is effective. If the string comparisons become a
+  // performance concern, we can implement a mangling of the predicate
+  // data easilly enough with a map back to the actual string. That's
+  // overkill for now, though.
+
+  // Make sure the predicate is in the table.
+  Decoders.insert(Decoder.str());
+  // Now figure out the index for when we write out the table.
+  DecoderSet::const_iterator P = std::find(Decoders.begin(),
+                                           Decoders.end(),
+                                           Decoder.str());
+  return (unsigned)(P - Decoders.begin());
 }
 
 static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
@@ -887,8 +1169,74 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
   return Predicates->getSize() > 0;
 }
 
-void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                                      unsigned Opc) const {
+bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
+  ListInit *Predicates =
+    AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+  for (unsigned i = 0; i < Predicates->getSize(); ++i) {
+    Record *Pred = Predicates->getElementAsRecord(i);
+    if (!Pred->getValue("AssemblerMatcherPredicate"))
+      continue;
+
+    std::string P = Pred->getValueAsString("AssemblerCondString");
+
+    if (!P.length())
+      continue;
+
+    return true;
+  }
+  return false;
+}
+
+unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
+                                          StringRef Predicate) const {
+  // Using the full predicate string as the key value here is a bit
+  // heavyweight, but is effective. If the string comparisons become a
+  // performance concern, we can implement a mangling of the predicate
+  // data easilly enough with a map back to the actual string. That's
+  // overkill for now, though.
+
+  // Make sure the predicate is in the table.
+  TableInfo.Predicates.insert(Predicate.str());
+  // Now figure out the index for when we write out the table.
+  PredicateSet::const_iterator P = std::find(TableInfo.Predicates.begin(),
+                                             TableInfo.Predicates.end(),
+                                             Predicate.str());
+  return (unsigned)(P - TableInfo.Predicates.begin());
+}
+
+void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
+                                            unsigned Opc) const {
+  if (!doesOpcodeNeedPredicate(Opc))
+    return;
+
+  // Build up the predicate string.
+  SmallString<256> Predicate;
+  // FIXME: emitPredicateMatch() functions can take a buffer directly rather
+  // than a stream.
+  raw_svector_ostream PS(Predicate);
+  unsigned I = 0;
+  emitPredicateMatch(PS, I, Opc);
+
+  // Figure out the index into the predicate table for the predicate just
+  // computed.
+  unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
+  SmallString<16> PBytes;
+  raw_svector_ostream S(PBytes);
+  encodeULEB128(PIdx, S);
+  S.flush();
+
+  TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
+  // Predicate index
+  for (unsigned i = 0, e = PBytes.size(); i != e; ++i)
+    TableInfo.Table.push_back(PBytes[i]);
+  // Push location for NumToSkip backpatching.
+  TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+  TableInfo.Table.push_back(0);
+  TableInfo.Table.push_back(0);
+}
+
+void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
+                                           unsigned Opc) const {
   BitsInit *SFBits =
     AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
   if (!SFBits) return;
@@ -914,13 +1262,11 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
     default:
       // The bit is not set; this must be an error!
       StringRef Name = AllInstructions[Opc]->TheDef->getName();
-      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
-             << Name
-             << " is set but Inst{" << i <<"} is unset!\n"
+      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " << Name
+             << " is set but Inst{" << i << "} is unset!\n"
              << "  - You can only mark a bit as SoftFail if it is fully defined"
              << " (1/0 - not '?') in Inst\n";
-      o << "#error SoftFail Conflict, " << Name << "::SoftFail{" << i 
-        << "} set but Inst{" << i << "} undefined!\n";
+      return;
     }
   }
 
@@ -930,27 +1276,31 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
   if (!NeedPositiveMask && !NeedNegativeMask)
     return;
 
-  std::string PositiveMaskStr = PositiveMask.toString(16, /*signed=*/false);
-  std::string NegativeMaskStr = NegativeMask.toString(16, /*signed=*/false);
-  StringRef BitExt = "";
-  if (BitWidth > 32)
-    BitExt = "ULL";
-
-  o.indent(Indentation) << "if (";
-  if (NeedPositiveMask)
-    o << "insn & 0x" << PositiveMaskStr << BitExt;
-  if (NeedPositiveMask && NeedNegativeMask)
-    o << " || ";
-  if (NeedNegativeMask)
-    o << "~insn & 0x" << NegativeMaskStr << BitExt;
-  o << ")\n";
-  o.indent(Indentation+2) << "S = MCDisassembler::SoftFail;\n";
+  TableInfo.Table.push_back(MCD::OPC_SoftFail);
+
+  SmallString<16> MaskBytes;
+  raw_svector_ostream S(MaskBytes);
+  if (NeedPositiveMask) {
+    encodeULEB128(PositiveMask.getZExtValue(), S);
+    S.flush();
+    for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
+      TableInfo.Table.push_back(MaskBytes[i]);
+  } else
+    TableInfo.Table.push_back(0);
+  if (NeedNegativeMask) {
+    MaskBytes.clear();
+    S.resync();
+    encodeULEB128(NegativeMask.getZExtValue(), S);
+    S.flush();
+    for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
+      TableInfo.Table.push_back(MaskBytes[i]);
+  } else
+    TableInfo.Table.push_back(0);
 }
 
-// Emits code to decode the singleton.  Return true if we have matched all the
-// well-known bits.
-bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         unsigned Opc) const {
+// Emits table entries to decode the singleton.
+void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                                            unsigned Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
@@ -961,107 +1311,70 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
   getIslands(StartBits, EndBits, FieldVals, Insn);
 
   unsigned Size = StartBits.size();
-  unsigned I, NumBits;
 
-  // If we have matched all the well-known bits, just issue a return.
-  if (Size == 0) {
-    o.indent(Indentation) << "if (";
-    if (!emitPredicateMatch(o, Indentation, Opc))
-      o << "1";
-    o << ") {\n";
-    emitSoftFailCheck(o, Indentation+2, Opc);
-    o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-    std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
-      Operands.find(Opc);
-    const std::vector<OperandInfo>& InsnOperands = OpIter->second;
-    for (std::vector<OperandInfo>::const_iterator
-         I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
-      // If a custom instruction decoder was specified, use that.
-      if (I->numFields() == 0 && I->Decoder.size()) {
-        o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                              << "(MI, insn, Address, Decoder)"
-                              << Emitter->GuardPostfix << "\n";
-        break;
-      }
-
-      emitBinaryParser(o, Indentation, *I);
-    }
-
-    o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
-                          << nameWithID(Opc) << '\n';
-    o.indent(Indentation) << "}\n"; // Closing predicate block.
-    return true;
-  }
-
-  // Otherwise, there are more decodings to be done!
-
-  // Emit code to match the island(s) for the singleton.
-  o.indent(Indentation) << "// Check ";
-
-  for (I = Size; I != 0; --I) {
-    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
-    if (I > 1)
-      o << " && ";
-    else
-      o << "for singleton decoding...\n";
-  }
-
-  o.indent(Indentation) << "if (";
-  if (emitPredicateMatch(o, Indentation, Opc)) {
-    o << " &&\n";
-    o.indent(Indentation+4);
+  // Emit the predicate table entry if one is needed.
+  emitPredicateTableEntry(TableInfo, Opc);
+
+  // Check any additional encoding fields needed.
+  for (unsigned I = Size; I != 0; --I) {
+    unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    TableInfo.Table.push_back(MCD::OPC_CheckField);
+    TableInfo.Table.push_back(StartBits[I-1]);
+    TableInfo.Table.push_back(NumBits);
+    uint8_t Buffer[8], *p;
+    encodeULEB128(FieldVals[I-1], Buffer);
+    for (p = Buffer; *p >= 128 ; ++p)
+      TableInfo.Table.push_back(*p);
+    TableInfo.Table.push_back(*p);
+    // Push location for NumToSkip backpatching.
+    TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+    // The fixup is always 16-bits, so go ahead and allocate the space
+    // in the table so all our relative position calculations work OK even
+    // before we fully resolve the real value here.
+    TableInfo.Table.push_back(0);
+    TableInfo.Table.push_back(0);
   }
 
-  for (I = Size; I != 0; --I) {
-    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
-    o << "fieldFromInstruction" << BitWidth << "(insn, "
-      << StartBits[I-1] << ", " << NumBits
-      << ") == " << FieldVals[I-1];
-    if (I > 1)
-      o << " && ";
-    else
-      o << ") {\n";
-  }
-  emitSoftFailCheck(o, Indentation+2, Opc);
-  o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
-    Operands.find(Opc);
-  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
-  for (std::vector<OperandInfo>::const_iterator
-       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
-    // If a custom instruction decoder was specified, use that.
-    if (I->numFields() == 0 && I->Decoder.size()) {
-      o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                            << "(MI, insn, Address, Decoder)"
-                            << Emitter->GuardPostfix << "\n";
-      break;
-    }
-
-    emitBinaryParser(o, Indentation, *I);
-  }
-  o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
-                        << nameWithID(Opc) << '\n';
-  o.indent(Indentation) << "}\n";
-
-  return false;
+  // Check for soft failure of the match.
+  emitSoftFailTableEntry(TableInfo, Opc);
+
+  TableInfo.Table.push_back(MCD::OPC_Decode);
+  uint8_t Buffer[8], *p;
+  encodeULEB128(Opc, Buffer);
+  for (p = Buffer; *p >= 128 ; ++p)
+    TableInfo.Table.push_back(*p);
+  TableInfo.Table.push_back(*p);
+
+  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc);
+  SmallString<16> Bytes;
+  raw_svector_ostream S(Bytes);
+  encodeULEB128(DIdx, S);
+  S.flush();
+
+  // Decoder index
+  for (unsigned i = 0, e = Bytes.size(); i != e; ++i)
+    TableInfo.Table.push_back(Bytes[i]);
 }
 
-// Emits code to decode the singleton, and then to decode the rest.
-void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         const Filter &Best) const {
-
+// Emits table entries to decode the singleton, and then to decode the rest.
+void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                                            const Filter &Best) const {
   unsigned Opc = Best.getSingletonOpc();
 
-  emitSingletonDecoder(o, Indentation, Opc);
+  // complex singletons need predicate checks from the first singleton
+  // to refer forward to the variable filterchooser that follows.
+  TableInfo.FixupStack.push_back(FixupList());
 
-  // Emit code for the rest.
-  o.indent(Indentation) << "else\n";
+  emitSingletonTableEntry(TableInfo, Opc);
 
-  Indentation += 2;
-  Best.getVariableFC().emit(o, Indentation);
-  Indentation -= 2;
+  resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                     TableInfo.Table.size());
+  TableInfo.FixupStack.pop_back();
+
+  Best.getVariableFC().emitTableEntries(TableInfo);
 }
 
+
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
@@ -1119,7 +1432,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     }
   }
 
-  unsigned BitIndex, InsnIndex;
+  unsigned BitIndex;
 
   // We maintain BIT_WIDTH copies of the bitAttrs automaton.
   // The automaton consumes the corresponding bit from each
@@ -1149,7 +1462,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     else
       bitAttrs.push_back(ATTR_NONE);
 
-  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+  for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
     insn_t insn;
 
     insnWithID(insn, Opcodes[InsnIndex]);
@@ -1200,7 +1513,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   bitAttr_t RA = ATTR_NONE;
   unsigned StartBit = 0;
 
-  for (BitIndex = 0; BitIndex < BitWidth; BitIndex++) {
+  for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
     bitAttr_t bitAttr = bitAttrs[BitIndex];
 
     assert(bitAttr != ATTR_NONE && "Bit without attributes");
@@ -1341,36 +1654,29 @@ void FilterChooser::doFilter() {
   BestIndex = -1;
 }
 
-// Emits code to decode our share of instructions.  Returns true if the
-// emitted code causes a return, which occurs if we know how to decode
-// the instruction at this level or the instruction is not decodeable.
-bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
-  if (Opcodes.size() == 1)
+// emitTableEntries - Emit state machine entries to decode our share of
+// instructions.
+void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
+  if (Opcodes.size() == 1) {
     // There is only one instruction in the set, which is great!
     // Call emitSingletonDecoder() to see whether there are any remaining
     // encodings bits.
-    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+    emitSingletonTableEntry(TableInfo, Opcodes[0]);
+    return;
+  }
 
   // Choose the best filter to do the decodings!
   if (BestIndex != -1) {
     const Filter &Best = Filters[BestIndex];
     if (Best.getNumFiltered() == 1)
-      emitSingletonDecoder(o, Indentation, Best);
+      emitSingletonTableEntry(TableInfo, Best);
     else
-      Best.emit(o, Indentation);
-    return false;
+      Best.emitTableEntry(TableInfo);
+    return;
   }
 
-  // We don't know how to decode these instructions!  Return 0 and dump the
-  // conflict set!
-  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
-  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
-    o << nameWithID(Opcodes[i]);
-    if (i < (N - 1))
-      o << ", ";
-    else
-      o << '\n';
-  }
+  // We don't know how to decode these instructions!  Dump the
+  // conflict set and bail.
 
   // Print out useful conflict information for postmortem analysis.
   errs() << "Decoding Conflict:\n";
@@ -1385,8 +1691,6 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
              getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
     errs() << '\n';
   }
-
-  return true;
 }
 
 static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
@@ -1549,62 +1853,168 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
   return true;
 }
 
-static void emitHelper(llvm::raw_ostream &o, unsigned BitWidth) {
-  unsigned Indentation = 0;
-  std::string WidthStr = "uint" + utostr(BitWidth) + "_t";
-
-  o << '\n';
-
-  o.indent(Indentation) << "static " << WidthStr <<
-    " fieldFromInstruction" << BitWidth <<
-    "(" << WidthStr <<" insn, unsigned startBit, unsigned numBits)\n";
-
-  o.indent(Indentation) << "{\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "assert(startBit + numBits <= " << BitWidth
-                        << " && \"Instruction field out of bounds!\");\n";
-  o << '\n';
-  o.indent(Indentation) << WidthStr << " fieldMask;\n";
-  o << '\n';
-  o.indent(Indentation) << "if (numBits == " << BitWidth << ")\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = (" << WidthStr << ")-1;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "else\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
-  --Indentation; --Indentation;
-
-  o << '\n';
-  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
+// emitFieldFromInstruction - Emit the templated helper function
+// fieldFromInstruction().
+static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
+  OS << "// Helper function for extracting fields from encoded instructions.\n"
+     << "template<typename InsnType>\n"
+   << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
+     << "                                     unsigned numBits) {\n"
+     << "    assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n"
+     << "           \"Instruction field out of bounds!\");\n"
+     << "    InsnType fieldMask;\n"
+     << "    if (numBits == sizeof(InsnType)*8)\n"
+     << "      fieldMask = (InsnType)(-1LL);\n"
+     << "    else\n"
+     << "      fieldMask = ((1 << numBits) - 1) << startBit;\n"
+     << "    return (insn & fieldMask) >> startBit;\n"
+     << "}\n\n";
+}
 
-  o << '\n';
+// emitDecodeInstruction - Emit the templated helper function
+// decodeInstruction().
+static void emitDecodeInstruction(formatted_raw_ostream &OS) {
+  OS << "template<typename InsnType>\n"
+     << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,\n"
+     << "                                      InsnType insn, uint64_t Address,\n"
+     << "                                      const void *DisAsm,\n"
+     << "                                      const MCSubtargetInfo &STI) {\n"
+     << "  uint64_t Bits = STI.getFeatureBits();\n"
+     << "\n"
+     << "  const uint8_t *Ptr = DecodeTable;\n"
+     << "  uint32_t CurFieldValue;\n"
+     << "  DecodeStatus S = MCDisassembler::Success;\n"
+     << "  for (;;) {\n"
+     << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
+     << "    switch (*Ptr) {\n"
+     << "    default:\n"
+     << "      errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
+     << "      return MCDisassembler::Fail;\n"
+     << "    case MCD::OPC_ExtractField: {\n"
+     << "      unsigned Start = *++Ptr;\n"
+     << "      unsigned Len = *++Ptr;\n"
+     << "      ++Ptr;\n"
+     << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << \", \"\n"
+     << "                   << Len << \"): \" << CurFieldValue << \"\\n\");\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_FilterValue: {\n"
+     << "      // Decode the field value.\n"
+     << "      unsigned Len;\n"
+     << "      InsnType Val = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // Perform the filter operation.\n"
+     << "      if (Val != CurFieldValue)\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << \", \" << NumToSkip\n"
+     << "                   << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" : \"PASS:\")\n"
+     << "                   << \" continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
+     << "\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_CheckField: {\n"
+     << "      unsigned Start = *++Ptr;\n"
+     << "      unsigned Len = *++Ptr;\n"
+     << "      InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      // Decode the field value.\n"
+     << "      uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // If the actual and expected values don't match, skip.\n"
+     << "      if (ExpectedValue != FieldValue)\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << \", \"\n"
+     << "                   << Len << \", \" << ExpectedValue << \", \" << NumToSkip\n"
+     << "                   << \"): FieldValue = \" << FieldValue << \", ExpectedValue = \"\n"
+     << "                   << ExpectedValue << \": \"\n"
+     << "                   << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_CheckPredicate: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Predicate Index value.\n"
+     << "      unsigned PIdx = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      // Check the predicate.\n"
+     << "      bool Pred;\n"
+     << "      if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      (void)Pred;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx << \"): \"\n"
+     << "            << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_Decode: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Opcode value.\n"
+     << "      unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
+     << "                   << \", using decoder \" << DecodeIdx << \"\\n\" );\n"
+     << "      DEBUG(dbgs() << \"----- DECODE SUCCESSFUL -----\\n\");\n"
+     << "\n"
+     << "      MI.setOpcode(Opc);\n"
+     << "      return decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm);\n"
+     << "    }\n"
+     << "    case MCD::OPC_SoftFail: {\n"
+     << "      // Decode the mask values.\n"
+     << "      unsigned Len;\n"
+     << "      InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n"
+     << "      if (Fail)\n"
+     << "        S = MCDisassembler::SoftFail;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? \"FAIL\\n\":\"PASS\\n\"));\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_Fail: {\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n"
+     << "      return MCDisassembler::Fail;\n"
+     << "    }\n"
+     << "    }\n"
+     << "  }\n"
+     << "  llvm_unreachable(\"bogosity detected in disassembler state machine!\");\n"
+     << "}\n\n";
 }
 
 // Emits disassembler code for instruction decoding.
 void FixedLenDecoderEmitter::run(raw_ostream &o) {
-  o << "#include \"llvm/MC/MCInst.h\"\n";
-  o << "#include \"llvm/Support/DataTypes.h\"\n";
-  o << "#include <assert.h>\n";
-  o << '\n';
-  o << "namespace llvm {\n\n";
+  formatted_raw_ostream OS(o);
+  OS << "#include \"llvm/MC/MCInst.h\"\n";
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/DataTypes.h\"\n";
+  OS << "#include \"llvm/Support/LEB128.h\"\n";
+  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
+  OS << "#include <assert.h>\n";
+  OS << '\n';
+  OS << "namespace llvm {\n\n";
+
+  emitFieldFromInstruction(OS);
 
   // Parameterize the decoders based on namespace and instruction width.
-  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
-    Target.getInstructionsByEnumValue();
+  NumberedInstructions = &Target.getInstructionsByEnumValue();
   std::map<std::pair<std::string, unsigned>,
            std::vector<unsigned> > OpcMap;
   std::map<unsigned, std::vector<OperandInfo> > Operands;
 
-  for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
-    const CodeGenInstruction *Inst = NumberedInstructions[i];
+  for (unsigned i = 0; i < NumberedInstructions->size(); ++i) {
+    const CodeGenInstruction *Inst = NumberedInstructions->at(i);
     const Record *Def = Inst->TheDef;
     unsigned Size = Def->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
@@ -1622,24 +2032,48 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
     }
   }
 
+  DecoderTableInfo TableInfo;
   std::set<unsigned> Sizes;
   for (std::map<std::pair<std::string, unsigned>,
                 std::vector<unsigned> >::const_iterator
        I = OpcMap.begin(), E = OpcMap.end(); I != E; ++I) {
-    // If we haven't visited this instruction width before, emit the
-    // helper method to extract fields.
-    if (!Sizes.count(I->first.second)) {
-      emitHelper(o, 8*I->first.second);
-      Sizes.insert(I->first.second);
-    }
-
     // Emit the decoder for this namespace+width combination.
-    FilterChooser FC(NumberedInstructions, I->second, Operands,
+    FilterChooser FC(*NumberedInstructions, I->second, Operands,
                      8*I->first.second, this);
-    FC.emitTop(o, 0, I->first.first);
+
+    // The decode table is cleared for each top level decoder function. The
+    // predicates and decoders themselves, however, are shared across all
+    // decoders to give more opportunities for uniqueing.
+    TableInfo.Table.clear();
+    TableInfo.FixupStack.clear();
+    TableInfo.Table.reserve(16384);
+    TableInfo.FixupStack.push_back(FixupList());
+    FC.emitTableEntries(TableInfo);
+    // Any NumToSkip fixups in the top level scope can resolve to the
+    // OPC_Fail at the end of the table.
+    assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
+    // Resolve any NumToSkip fixups in the current scope.
+    resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                       TableInfo.Table.size());
+    TableInfo.FixupStack.clear();
+
+    TableInfo.Table.push_back(MCD::OPC_Fail);
+
+    // Print the table to the output stream.
+    emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), I->first.first);
+    OS.flush();
   }
 
-  o << "\n} // End llvm namespace \n";
+  // Emit the predicate function.
+  emitPredicateFunction(OS, TableInfo.Predicates, 0);
+
+  // Emit the decoder function.
+  emitDecoderFunction(OS, TableInfo.Decoders, 0);
+
+  // Emit the main entry point for the decoder, decodeInstruction().
+  emitDecodeInstruction(OS);
+
+  OS << "\n} // End llvm namespace\n";
 }
 
 namespace llvm {
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 3adb8694d9ab..b41ad94aca39 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -319,6 +319,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isCompare)          OS << "|(1<<MCID::Compare)";
   if (Inst.isMoveImm)          OS << "|(1<<MCID::MoveImm)";
   if (Inst.isBitcast)          OS << "|(1<<MCID::Bitcast)";
+  if (Inst.isSelect)           OS << "|(1<<MCID::Select)";
   if (Inst.isBarrier)          OS << "|(1<<MCID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1<<MCID::DelaySlot)";
   if (Inst.isCall)             OS << "|(1<<MCID::Call)";
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index b9138780aa8d..02546dfca715 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -145,9 +145,9 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
     if (!Namespace.empty())
       OS << "namespace " << Namespace << " {\n";
     OS << "enum {\n  NoSubRegister,\n";
-    for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i)
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
       OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
-    OS << "  NUM_TARGET_NAMED_SUBREGS\n};\n";
+    OS << "  NUM_TARGET_SUBREGS\n};\n";
     if (!Namespace.empty())
       OS << "}\n";
   }
@@ -885,17 +885,6 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "\" };\n\n";
 
-  // Emit names of the anonymous subreg indices.
-  unsigned NamedIndices = RegBank.getNumNamedIndices();
-  if (SubRegIndices.size() > NamedIndices) {
-    OS << "  enum {";
-    for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) {
-      OS << "\n    " << SubRegIndices[i]->getName() << " = " << i+1;
-      if (i+1 != e)
-        OS << ',';
-    }
-    OS << "\n  };\n\n";
-  }
   OS << "\n";
 
   // Now that all of the structs have been emitted, emit the instances.
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 039868da7860..25bbcbd9f275 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -566,6 +566,9 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
     if opts.maxTests is not None:
         tests = tests[:opts.maxTests]
 
+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
     extra = ''
     if len(tests) != numTotalTests:
         extra = ' of %d' % numTotalTests
@@ -589,9 +592,6 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
         else:
             print header
 
-    # Don't create more threads than tests.
-    opts.numThreads = min(len(tests), opts.numThreads)
-
     startTime = time.time()
     display = TestingProgressDisplay(opts, len(tests), progressBar)
     provider = TestProvider(tests, opts.maxTime)